All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.microsoft.azure.documentdb.internal.directconnectivity.QuorumReader Maven / Gradle / Ivy

/*
 * Copyright (c) Microsoft Corporation.  All rights reserved.
 */

package com.microsoft.azure.documentdb.internal.directconnectivity;

import java.util.ArrayList;
import java.util.List;

import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.microsoft.azure.documentdb.ConsistencyLevel;
import com.microsoft.azure.documentdb.DocumentClientException;
import com.microsoft.azure.documentdb.internal.AuthorizationTokenProvider;
import com.microsoft.azure.documentdb.internal.DatabaseAccountConfigurationProvider;
import com.microsoft.azure.documentdb.internal.DocumentServiceRequest;

//=================================================================================================================
// Strong read logic:
//=================================================================================================================
//
//              ------------------- PerformPrimaryRead-------------------------------------------------------------
//              |                       ^                                                                         |
//        [RetryOnSecondary]            |                                                                         |
//              |                   [QuorumNotSelected]                                                           |
//             \/                      |                                                                         \/
// Start-------------------------->SecondaryQuorumRead-------------[QuorumMet]-------------------------------->Result
//                                      |                                                                         ^
//                                  [QuorumSelected]                                                              |
//                                      |                                                                         |
//                                      \/                                                                        |
//                                  PrimaryReadBarrier-------------------------------------------------------------
//
//=================================================================================================================
// BoundedStaleness quorum read logic:
//=================================================================================================================
//
//              ------------------- PerformPrimaryRead-------------------------------------------------------------
//              |                       ^                                                                         |
//        [RetryOnSecondary]            |                                                                         |
//              |                   [QuorumNotSelected]                                                           |
//             \/                      |                                                                         \/
// Start-------------------------->SecondaryQuorumRead-------------[QuorumMet]-------------------------------->Result
//                                      |                                                                         ^
//                                  [QuorumSelected]                                                              |
//                                      |                                                                         |
//                                      |                                                                         |
//                                      ---------------------------------------------------------------------------
class QuorumReader {
    private final static int MAX_NUMBER_OF_READ_BARRIER_RETRIES = 6;
    private final static int MAX_NUMBER_OF_READ_QUORUM_RETRIES = 6;
    private final static int DELAY_BETWEEN_READ_BARRIER_CALLS_IN_MS = 10;

    private final static int MAX_BARRIER_RETRIES_FOR_MULTI_REGION = 30;
    private final static int BARRIER_RETRY_INTERVAL_IN_MS_FOR_MULTIREGION = 30;

    private final static int MAX_SHORT_BARRIER_RETRIES_FOR_MULTI_REGION = 4;
    private final static int SHORT_BARRIER_RETRY_INTERVAL_IN_MS_FOR_MULTI_REGION = 10;

    private final Logger logger = LoggerFactory.getLogger(QuorumReader.class);

    private StoreReader storeReader;
    private AuthorizationTokenProvider authorizationTokenProvider;
    private final DatabaseAccountConfigurationProvider configurationProvider;

    public QuorumReader(StoreReader storeReader,
                        AuthorizationTokenProvider authorizationTokenProvider,
                        DatabaseAccountConfigurationProvider configurationProvider) {
        this.storeReader = storeReader;
        this.authorizationTokenProvider = authorizationTokenProvider;
        this.configurationProvider = configurationProvider;
    }

    StoreResponse readStrong(DocumentServiceRequest request, int quorumValue) throws DocumentClientException {
        int readQuorumRetry = QuorumReader.MAX_NUMBER_OF_READ_QUORUM_RETRIES;
        boolean shouldRetryOnSecondary = false;
        boolean hasPerformedReadFromPrimary = false;
        do {
            shouldRetryOnSecondary = false;
            // First read from secondaries only.
            ReadQuorumResult secondaryQuorumReadResult = this.readQuorum(request, quorumValue, false /* includePrimary */, ConsistencyLevel.Strong);
            switch (secondaryQuorumReadResult.getQuorumResult()) {
                case QuorumMet:
                    return secondaryQuorumReadResult.getResponse();
                case QuorumSelected:
                    // Barrier request with primary didn't succeed, so will just exit and retry if more retries left.
                    this.logger.debug("Couldn't converge on the LSN {}"
                            + " after primary read barrier with read quorum {} for strong read.",
                                    secondaryQuorumReadResult.getSelectedLsn(), quorumValue);
                    request.setQuorumSelectedLSN(secondaryQuorumReadResult.getSelectedLsn());
                    request.setQuorumSelectedStoreResponse(secondaryQuorumReadResult.getStoreReadResult());
                    request.setGlobalCommittedSelectedLSN(secondaryQuorumReadResult.getGlobalCommittedLSN());

                    break;
                case QuorumNotSelected:
                    if (hasPerformedReadFromPrimary) {
                        this.logger.warn("Primary read already attempted."
                                + " Quorum couldn't be selected after retrying on secondaries.");
                        throw new DocumentClientException(HttpStatus.SC_GONE, "Primary read already attempted. Quorum couldn't be selected after retrying on secondaries.");
                    }

                    this.logger.debug("Quorum could not be selected with read quorum of {}", quorumValue);
                    ReadPrimaryResult response = this.readPrimary(request, quorumValue, ConsistencyLevel.Strong);

                    if (response.isSuccessful()) {
                        this.logger.debug("Primary read succeeded");
                        return response.getResponse();
                    } else if (response.isShouldRetryOnSecondary()) {
                        this.logger.debug("ReadPrimary did not succeed. Will retry on secondary.");
                        shouldRetryOnSecondary = true;
                        hasPerformedReadFromPrimary = true;
                    } else {
                        this.logger.warn("Could not get successful response from ReadPrimary");
                        throw new DocumentClientException(HttpStatus.SC_GONE, "Could not get successful response from ReadPrimary");
                    }

                    break;
                default:
                    this.logger.error("Unknown read quorum result {}", secondaryQuorumReadResult.getQuorumResult().toString());
                    throw new DocumentClientException(HttpStatus.SC_INTERNAL_SERVER_ERROR, "Unknown read quorum result.");
            }
        } while (--readQuorumRetry > 0 && shouldRetryOnSecondary);

        this.logger.warn("Could not complete read quorum with read quorum value of {}", quorumValue);
        throw new DocumentClientException(HttpStatus.SC_GONE, "Could not complete read quorum.");
    }

    private ReadPrimaryResult readPrimary(DocumentServiceRequest request, int readQuorum, ConsistencyLevel consistencyLevel) throws DocumentClientException {
        request.setForceAddressRefresh(false);
        StoreReadResult storeReadResult = this.storeReader.readPrimary(request, true, consistencyLevel);
            if (!storeReadResult.isValid()) {
                throw storeReadResult.getException();
            }

            if (storeReadResult.getCurrentReplicaSetSize() <= 0 || storeReadResult.getLSN() < 0 || storeReadResult.getQuorumAckedLSN() < 0) {
                this.logger.warn(
                                "Invalid value received from response header."
                                + " CurrentReplicaSetSize {}, StoreLSN {}, QuorumAckedLSN {}. Throwing gone exception",
                                storeReadResult.getCurrentReplicaSetSize(), storeReadResult.getLSN(),
                                storeReadResult.getQuorumAckedLSN());
                throw new DocumentClientException(HttpStatus.SC_GONE, "Invalid value received from response header.");
            }

            // If we are doing read primary but the replica set size is bigger than the read quorum, then we wait for secondaries
            if (storeReadResult.getCurrentReplicaSetSize() > readQuorum) {
                logger.debug("Unexpected response. Replica Set size is {} which is greater than min value {}",
                        storeReadResult.getCurrentReplicaSetSize(), readQuorum);
                return new ReadPrimaryResult(false /*isSuccessful*/, true /*retry on secondaries */, null, request.getRequestChargeTracker());
            }

        return new ReadPrimaryResult(true /*isSuccessful*/, false /*retry on secondaries */, storeReadResult, request.getRequestChargeTracker());
    }

    private ReadQuorumResult readQuorum(DocumentServiceRequest request, int readQuorum, boolean includePrimary, ConsistencyLevel consistencyLevel)
            throws DocumentClientException {
        long maxLsn = 0;
        long globalCommittedLSN = -1;
        StoreReadResult highestLsnResult = null;

        if (request.getQuorumSelectedStoreResponse() == null) {
            List responseResult = this.storeReader.readMultipleReplica(request, includePrimary, readQuorum, consistencyLevel);
            int responseCount = 0;
            for (StoreReadResult result : responseResult) {
                if (result.isValid()) {
                    responseCount++;
                }
            }
            if (responseCount < readQuorum) {
                return new ReadQuorumResult(ReadQuorumResultKind.QuorumNotSelected, -1, -1, null, request.getRequestChargeTracker());
            }

            // either request overrides consistency level with strong,
            // or request does not override and account default consistency level is strong
            boolean isGlobalStrongReadCandidate = ReplicatedResourceClient.GLOBAL_STRONG_ENABLED
                    && this.configurationProvider.getStoreConsistencyPolicy() == ConsistencyLevel.Strong
                    && (request.getOriginalRequestConsistencyLevel() == null
                        || request.getOriginalRequestConsistencyLevel() == ConsistencyLevel.Strong);

            QuorumMetCheckResult isQuorumMetResult = isQuorumMet(responseResult, readQuorum, includePrimary, isGlobalStrongReadCandidate);
            maxLsn = isQuorumMetResult.readLSN;
            globalCommittedLSN = isQuorumMetResult.globalCommittedLSN;
            if (isQuorumMetResult.isQuorumMet) {
                return new ReadQuorumResult(ReadQuorumResultKind.QuorumMet,
                        maxLsn,
                        globalCommittedLSN,
                        isQuorumMetResult.selectedResponse,
                        request.getRequestChargeTracker());
            }
            highestLsnResult = isQuorumMetResult.selectedResponse;
        } else {
            logger.warn("wait to catch up max lsn");
            maxLsn = request.getQuorumSelectedLSN();
            globalCommittedLSN = request.getGlobalCommittedSelectedLSN();
            highestLsnResult = request.getQuorumSelectedStoreResponse();
        }
        // If the replicas are not on the same LSN, we ping the replicas with a
        // head request ( Barrier request ) MAX_NUMBER_OF_READ_BARRIER_RETRIES
        // times to see
        // if the replicas can reach quorum and have the same LSN
        logger.debug("Quorum is not met, sending barrier request to replicas");
        DocumentServiceRequest barrierRequest = BarrierRequestHelper.create(request, this.authorizationTokenProvider);
        if (this.waitForReadBarrier(barrierRequest, false, readQuorum, maxLsn, globalCommittedLSN, consistencyLevel)) {
            return new ReadQuorumResult(ReadQuorumResultKind.QuorumMet, maxLsn, globalCommittedLSN, highestLsnResult, request.getRequestChargeTracker());
        }

        this.logger.warn("Quorum selected with maxLsn {}", maxLsn);
        return new ReadQuorumResult(ReadQuorumResultKind.QuorumSelected, maxLsn, globalCommittedLSN, highestLsnResult, request.getRequestChargeTracker());
    }

    private QuorumMetCheckResult isQuorumMet(
            List readResponses,
            int readQuorum,
            boolean isPrimaryIncluded,
            boolean isGlobalStrongRead) {

        long maxLsn = 0;
        long minLsn = Long.MAX_VALUE;

        // check if no response is valid
        int validResponsesCount = 0;
        long numberOfReadRegions = 0;
        long maxGlobalCommittedLSN = 0;
        for (StoreReadResult readResponse : readResponses) {
            if (readResponse.isValid()) {
                validResponsesCount++;
                numberOfReadRegions = Math.max(numberOfReadRegions, readResponse.getNumberOfReadRegions());
                maxGlobalCommittedLSN = Math.max(maxGlobalCommittedLSN, readResponse.getGlobalCommittedLSN());
            }
        }
        if (validResponsesCount == 0) {
            return new QuorumMetCheckResult(false, 0, -1, null);
        }

        boolean checkForGlobalStrong = isGlobalStrongRead && numberOfReadRegions > 0;
        StoreReadResult selectedResponse = null;

        // checks if quorum is met and also updates maxLsn and the corresponding response as highestLsnResult
        // Pick any R replicas in the response and check if they are at the same LSN
        int replicaCountMaxLsn = 0;
        for (StoreReadResult response : readResponses) {

            if (!response.isValid()) continue;

            if (response.getLSN() == maxLsn) {
                replicaCountMaxLsn++;

            } else if (response.getLSN() > maxLsn) {
                replicaCountMaxLsn = 1;
                maxLsn = response.getLSN();
                selectedResponse = response;
            }

            if (response.getLSN() < minLsn) {
                minLsn = response.getLSN();
            }
        }

        long readLsn = selectedResponse.getItemLSN() == -1
                ? maxLsn
                : Math.min(selectedResponse.getItemLSN(), maxLsn);
        long globalCommittedLSN = checkForGlobalStrong ? readLsn : -1;

        logger.info("QuorumReader: MaxLSN {} ReplicaCountMaxLSN {} bCheckGlobalStrong {} MaxGlobalCommittedLSN {} " +
                        "NumberOfReadRegions {} SelectedResponseItemLSN {}",
                maxLsn, replicaCountMaxLsn, checkForGlobalStrong, maxGlobalCommittedLSN,
                numberOfReadRegions, selectedResponse.getItemLSN());

        // quorum is met if one of the following conditions are satisfied:
        // 1. readLsn is greater than zero
        //    AND the number of responses that have the same LSN as
        //          the selected response is greater than or equal to the read quorum
        //    AND if applicable, the max GlobalCommittedLSN of all responses is greater than or equal to
        //          the lsn of the selected response.

        // 2. if the request is a point-read request,
        //    AND there are more than one response in the readResponses
        //    AND the LSN of the returned resource of the selected response is less than or equal to
        //          the minimum lsn of the all the responses,
        //    AND if applicable, the LSN of the returned resource of the selected response is less than or equal to
        //          the minimum globalCommittedLsn of all the responses.
        //    This means that the returned resource is old enough to have been committed by at least all the received responses,
        //    which should be larger than or equal to the read quorum, which therefore means we have strong consistency.

        boolean isQuorumMet = false;

        if ((readLsn > 0 && replicaCountMaxLsn >= readQuorum) &&
                (!checkForGlobalStrong || maxGlobalCommittedLSN >= maxLsn))
        {
            isQuorumMet = true;
        }

        if(!isQuorumMet && validResponsesCount >= readQuorum && selectedResponse.getItemLSN() != -1 &&
                (minLsn != Long.MAX_VALUE && selectedResponse.getItemLSN() <= minLsn) &&
                (!checkForGlobalStrong || (selectedResponse.getItemLSN() <= maxGlobalCommittedLSN)))
        {
            isQuorumMet = true;
        }

        return new QuorumMetCheckResult(isQuorumMet, readLsn, globalCommittedLSN, selectedResponse);
    }

    // Test if any response from the list has a equal or higher global committed LSN than the barrier LSN.
    private boolean matchGlobalCommitted(List responses, long lsn) {
        for (StoreReadResult result : responses) {
            if (result.getGlobalCommittedLSN() >= lsn) {
                return true;
            }
        }
        return false;
    }

    private boolean waitForReadBarrier(DocumentServiceRequest barrierRequest,
                                       boolean allowPrimary,
                                       int readQuorum,
                                       long readBarrierLsn,
                                       long globalCommittedLSN,
                                       ConsistencyLevel consistencyLevel) throws DocumentClientException {
        int readBarrierRetryCount = QuorumReader.MAX_NUMBER_OF_READ_BARRIER_RETRIES;
        int readBarrierRetryCountMultiRegion = QuorumReader.MAX_BARRIER_RETRIES_FOR_MULTI_REGION;

        do {
            barrierRequest.setForceAddressRefresh(false);
            List responses = this.storeReader.readMultipleReplica(barrierRequest, allowPrimary, readQuorum, consistencyLevel);
            ArrayList responseLSNs = new ArrayList<>(responses.size());
            int validLsnCount = 0;
            for (StoreReadResult storeReadResult : responses) {
                if (storeReadResult.getLSN() >= readBarrierLsn) {
                    validLsnCount++;
                }
                responseLSNs.add(storeReadResult.getLSN());
            }

            if (validLsnCount >= readQuorum &&
                    (!(globalCommittedLSN > 0) || matchGlobalCommitted(responses, globalCommittedLSN))) {
                this.logger.debug("secondaries barrier requeest succeeded");
                return true;
            }

            this.logger.warn(
                            "Barrier request failed with validLsnCount {}, response LSNs <{}> and readQuorum {} with remaining retries {} and allow primary is {}",
                            validLsnCount, 
                            StringUtils.join(responseLSNs, ','), 
                            readQuorum, 
                            readBarrierRetryCount, 
                            allowPrimary);

            try {
                Thread.sleep(QuorumReader.DELAY_BETWEEN_READ_BARRIER_CALLS_IN_MS);
            } catch (InterruptedException e) {
                throw new IllegalStateException("Delay thread interrupted with exception: ", e);
            }
        } while (--readBarrierRetryCount > 0);

        // we will go into global strong read barrier mode for global strong requests after regular barrier calls have been exhausted.
        if (globalCommittedLSN > 0) {
            while (readBarrierRetryCountMultiRegion-- > 0) {
                List responses = this.storeReader.readMultipleReplica(barrierRequest, allowPrimary, readQuorum, consistencyLevel);
                int validLsnCount = 0;
                for (StoreReadResult storeReadResult : responses) {
                    if (storeReadResult.getLSN() >= readBarrierLsn) {
                        validLsnCount++;
                    }
                }

                if (validLsnCount >= readQuorum &&
                        matchGlobalCommitted(responses, globalCommittedLSN)) {
                    return true;
                }

                try {
                    if ((QuorumReader.MAX_BARRIER_RETRIES_FOR_MULTI_REGION - readBarrierRetryCountMultiRegion) >
                            QuorumReader.MAX_SHORT_BARRIER_RETRIES_FOR_MULTI_REGION) {
                        Thread.sleep(QuorumReader.BARRIER_RETRY_INTERVAL_IN_MS_FOR_MULTIREGION);
                    } else {
                        Thread.sleep(QuorumReader.SHORT_BARRIER_RETRY_INTERVAL_IN_MS_FOR_MULTI_REGION);
                    }
                } catch (InterruptedException e) {
                    throw new IllegalStateException("Delay thread interrupted with exception: ", e);
                }
            }
        }

        return false;
    }

    StoreResponse readBoundedStaleness(DocumentServiceRequest request, int readQuorumValue) throws DocumentClientException {

        int readQuorumRetry = QuorumReader.MAX_NUMBER_OF_READ_QUORUM_RETRIES;
        boolean shouldRetryOnSecondary = false;
        boolean hasPerformedReadFromPrimary = false;

        do
        {
            logger.warn("remaining retries {}", readQuorumRetry);
            ReadQuorumResult secondaryQuorumReadResult = readQuorum(request, readQuorumValue, false, ConsistencyLevel.BoundedStaleness);
            shouldRetryOnSecondary = false;

            switch (secondaryQuorumReadResult.getQuorumResult())
            {
            case QuorumMet:
                this.logger.debug("ReadQuorum successful");
                return secondaryQuorumReadResult.getResponse();

                // We do not perform the read barrier on Primary for BoundedStaleness as it has a 
                // potential to be always caught up in case of async replication
            case QuorumSelected:
                this.logger.warn("Could not converge on the LSN {} after"
                        + " read barrier with read quorum {}."
                        + " Will not perform barrier call on Primary for BoundedStaleness", 
                        secondaryQuorumReadResult.getSelectedLsn(), readQuorumValue);

                request.setQuorumSelectedStoreResponse(secondaryQuorumReadResult.getStoreReadResult());
                request.setQuorumSelectedLSN(secondaryQuorumReadResult.getSelectedLsn());
                break;

            case QuorumNotSelected:
                if (hasPerformedReadFromPrimary)
                {
                    this.logger.warn("Primary read already attempted."
                            + " Quorum could not be selected after retrying on secondaries.");
                    throw new DocumentClientException(HttpStatus.SC_GONE, "Primary read already attempted."
                            + " Quorum could not be selected after retrying on secondaries.");
                }

                this.logger.debug(
                        "Quorum could not be selected with read quorum of {}", readQuorumValue);
                ReadPrimaryResult response = readPrimary(request, readQuorumValue, ConsistencyLevel.BoundedStaleness);

                if (response.isSuccessful() && response.isShouldRetryOnSecondary())
                {
                    this.logger.error("PrimaryResult has both Successful and ShouldRetryOnSecondary flags set");
                    assert false : "PrimaryResult has both Successful and ShouldRetryOnSecondary flags set";
                }
                else if (response.isSuccessful())
                {
                    this.logger.debug("ReadPrimary successful");

                    return response.getResponse();
                }
                else if (response.isShouldRetryOnSecondary())
                {
                    shouldRetryOnSecondary = true;
                    this.logger.debug("ReadPrimary did not succeed. Will retry on secondary.");
                    hasPerformedReadFromPrimary = true;
                }
                else
                {
                    this.logger.warn("Could not get successful response from ReadPrimary");
                    throw new DocumentClientException(HttpStatus.SC_GONE, "Could not get successful response from ReadPrimary");
                }
                break;

            default:
                this.logger.warn("Unknown ReadQuorum result {}", secondaryQuorumReadResult.getQuorumResult().toString());
                throw new DocumentClientException(HttpStatus.SC_INTERNAL_SERVER_ERROR, "Unknown ReadQuorum");
            }
        } while (--readQuorumRetry > 0 && shouldRetryOnSecondary);

        this.logger.error("Could not complete read quourm with read quorum value of {}", readQuorumValue);
        throw new DocumentClientException(HttpStatus.SC_GONE, String.format("Could not complete read quourm with read quorum value of %d", readQuorumValue));
    }

    private static class QuorumMetCheckResult {
        boolean isQuorumMet;
        long readLSN;
        long globalCommittedLSN;
        StoreReadResult selectedResponse;

        QuorumMetCheckResult(boolean isQuorumMet, long readLSN, long globalCommittedLSN, StoreReadResult selectedResponse) {
            this.isQuorumMet = isQuorumMet;
            this.readLSN = readLSN;
            this.globalCommittedLSN = globalCommittedLSN;
            this.selectedResponse = selectedResponse;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy