com.staros.shard.ShardChecker Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of starmanager Show documentation
There is a newer version: 3.4-rc2
// Copyright 2021-present StarRocks, Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.


package com.staros.shard;

import com.google.common.base.Preconditions;
import com.staros.exception.ExceptionCode;
import com.staros.exception.StarException;
import com.staros.proto.PlacementPolicy;
import com.staros.schedule.ReplicaWorkerInvertIndex;
import com.staros.schedule.ScheduleScorer;
import com.staros.schedule.Scheduler;
import com.staros.schedule.select.FirstNSelector;
import com.staros.service.ServiceManager;
import com.staros.util.AbstractServer;
import com.staros.util.Config;
import com.staros.worker.Worker;
import com.staros.worker.WorkerGroup;
import com.staros.worker.WorkerManager;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

public class ShardChecker extends AbstractServer {
    private static final Logger LOG = LogManager.getLogger(ShardChecker.class);

    private final ServiceManager serviceManager;
    private final WorkerManager workerManager;
    private final Scheduler scheduler;
    // TODO: use thread pool
    private final Thread checkThread;
    private final FirstNSelector selector;
    private final long coolDownMs = 60 * 1000; // 60 seconds
    private AtomicLong allows = new AtomicLong();

    public ShardChecker(ServiceManager serviceManager, WorkerManager workerManager, Scheduler scheduler) {
        this.serviceManager = serviceManager;
        this.workerManager = workerManager;
        this.scheduler = scheduler;
        this.selector = new FirstNSelector();
        this.checkThread = new Thread(this::runCheckThread);
    }

    @Override
    public void doStart() {
        checkThread.start();
    }

    @Override
    public void doStop() {
        try {
            checkThread.interrupt();
            checkThread.join();
        } catch (InterruptedException e) {
            LOG.warn("join shard checker thread failed! {}", e.getMessage());
        }
    }

    /**
     * Sampling logging, avoid too frequent logging overwhelming log file
     *
     * @param level   log level
     * @param message log message
     */
    private void sampleLogging(Level level, String message) {
        long now = System.currentTimeMillis();
        long allowed = allows.get();
        if (now > allowed && allows.compareAndSet(allowed, now + coolDownMs)) {
            LOG.log(level, message);
        }
    }

    private void runCheckThread() {
        while (isRunning()) {
            if (Config.DISABLE_BACKGROUND_SHARD_SCHEDULE_CHECK) {
                // reduce frequency logging, check interval is 10s, sample logging is 1 min.
                sampleLogging(Level.WARN,
                        "DISABLE_BACKGROUND_SHARD_SCHEDULE_CHECK is turned on. Disabling balance shards " +
                        "ability!");
            } else {
                LOG.debug("running shard check once.");
                for (String serviceId : serviceManager.getServiceIdSet()) {
                    shardHealthCheckForService(serviceId);
                }

                // TODO: shard balance inside a shard group, does not need to do it every loop
                for (String serviceId : serviceManager.getServiceIdSet()) {
                    shardGroupBalanceCheckForService(serviceId);
                }
            }

            try {
                Thread.sleep(Config.SHARD_CHECKER_LOOP_INTERVAL_SEC * 1000L);
            } catch (InterruptedException e) {
                LOG.info("shard checker thread interrupted! {}", e.getMessage());
            }
        }
    }

    /**
     * check the shards inside a service
     *
     * @param serviceId the target service id
     */
    protected void shardHealthCheckForService(String serviceId) {
        LOG.debug("Start shard replica health check for service: {}", serviceId);
        ShardManager shardManager = serviceManager.getShardManager(serviceId);
        if (shardManager == null) {
            LOG.info("ShardManager not exist for service {}, skip the healthy check.", serviceId);
            return;
        }
        // Get a snapshot of shard ids, new added shards will be checked in next round.
        List shardIds = shardManager.getAllShardIds();
        for (long shardId : shardIds) {
            Shard shard = shardManager.getShard(shardId);
            if (shard == null) {
                LOG.info("shard not exist for service {}, shard {}, skip it.", serviceId, shardId);
                continue;
            }
            try {
                shardHealthCheck(shard);
            } catch (StarException exception) {
                LOG.info("Got exception during processing service:{}, shard:{} health check, skip it.",
                        serviceId, shardId, exception);
            }
        }
    }

    /**
     * Check given shard's replica status, remove dead replica and add missing replicas. The task is expected to
     * run with low priority.
     *
     * @param shard the shard to be checked
     */
    private void shardHealthCheck(Shard shard) throws StarException {
        // DON'T EXPECT shard can have more than `nHealthyWeight` copies of unhealthy replicas
        final int nHealthyWeight = 10000;

        List replicas = shard.getReplicaWorkerIds();
        // Divide replicas into healthy replicas and unhealthy replicas, healthy replicas group by workerGroupId.
        // workerGroup -> numOfReplicas (nHealthyWeight * healthy + unhealthy)
        Map healthy = new HashMap<>();
        // list of unhealthy replica worker ids
        List unknownWorkers = new ArrayList<>();
        replicas.forEach(workerId -> {
            Worker w = workerManager.getWorker(workerId);
            if (w == null) {
                unknownWorkers.add(workerId);
            } else {
                // Just a lazy way to encode numOfHealthyWorkers and numOfDeadWorkers into a single counter
                final int delta = w.isAlive() ? nHealthyWeight : 1;
                // For workerGroup that has unhealthy replicas and zero healthy replica, NEED add it into healthy map.
                // so the shard replicas can be scheduled to the workerGroup again.
                // Following scenario is possible:
                //  A shard replica have a few healthy replicas in a worker group and,
                //   a) have healthy replicas.
                //   b) have unhealthy replica, no healthy replica (e.g. the worker is dead.).
                //   c) unhealthy replica removed, scheduling of new replicas is triggered.
                //   d) scheduling failed, no replica exists in the worker group. (e.g. no available workers.).
                //   e) new workers added into the worker group.
                //   f) shard replica check skipped even though there are available workers.
                healthy.compute(w.getGroupId(), (key, value) -> value == null ? delta : value + delta);
            }
        });

        // TODO: sophisticated control of removal by adding more state of worker: UP/DISCONNECTED/DOWN/CONNECTING
        // * DEAD workers can be removed directly
        // * DISCONNECTED/CONNECTING workers are unhealthy workers, but may be functional if giving more time.
        // Check remain replicas if meets the expected number of replicas.
        String serviceId = shard.getServiceId();
        long shardId = shard.getShardId();
        if (!unknownWorkers.isEmpty()) {
            List shardIds = Collections.nCopies(1, shardId);
            for (long workerId : unknownWorkers) {
                try {
                    scheduler.scheduleAsyncRemoveFromWorker(serviceId, shardIds, workerId);
                } catch (StarException exception) {
                    // log a message and continue
                    LOG.info("Fail to schedule a remove-shard request for service:{}, shard:{}, worker:{}, error:",
                            serviceId, shardId, workerId, exception);
                }
            }
        }

        int expectedNum = shard.getExpectedReplicaCount();
        for (Map.Entry entry : healthy.entrySet()) {
            long workerGroupId = entry.getKey();
            int nHealthy = entry.getValue() / nHealthyWeight;
            int nUnhealthy = entry.getValue() % nHealthyWeight;
            // When both healthy replicas and non-healthy replicas exist,
            // 1. request to add replicas up to expected count
            // 2. request to remove replicas down to expected count
            try {
                if (nHealthy < expectedNum) {
                    // give a try to schedule the shard again to the workerGroup, let scheduler choose a proper worker.
                    LOG.debug("Request schedule new replicas for service:{}, shard:{}, workerGroup:{},"
                                    + " expected num: {}, actual num: {}",
                            serviceId, shardId, workerGroupId, expectedNum, nHealthy);
                    scheduler.scheduleAsyncAddToGroup(serviceId, shardId, workerGroupId);
                }
                if (nHealthy + nUnhealthy > expectedNum) {
                    LOG.debug("Remove redundant replicas for service:{}, shard:{}, workerGroup:{},"
                                    + " expected num: {}, actual num: {}",
                            serviceId, shardId, workerGroupId, expectedNum, nHealthy + nUnhealthy);
                    scheduler.scheduleAsyncRemoveFromGroup(serviceId, shardId, workerGroupId);
                }
            } catch (StarException exception) {
                LOG.info("Fail to schedule tasks to scheduler. error:", exception);
            }
        }
    }

    /**
     * Check and balance shard group if needed
     *
     * @param serviceId target service id
     */
    protected void shardGroupBalanceCheckForService(String serviceId) {
        LOG.debug("Start shard group balance health check for service: {}", serviceId);
        ShardManager shardManager = serviceManager.getShardManager(serviceId);
        if (shardManager == null) {
            LOG.info("ShardManager not exist for service {}, skip the shard group balance check.", serviceId);
            return;
        }
        // Get a snapshot of shard group ids, new created shard group will be checked in next round.
        List shardGroupIds = shardManager.getAllShardGroupIds();
        for (long groupId : shardGroupIds) {
            ShardGroup group = shardManager.getShardGroup(groupId);
            if (group == null) {
                LOG.info("shard group {} not exist for service {}, skip it.", groupId, serviceId);
                continue;
            }
            if (group.getShardIds().isEmpty()) {
                LOG.debug("empty shard group {} in service {}. skip it!", groupId, serviceId);
                continue;
            }
            try {
                PlacementPolicy policy = group.getPlacementPolicy();
                switch (policy) {
                    case PACK:
                        balancePackShardGroup(shardManager, group);
                        break;
                    case SPREAD:
                        balanceSpreadShardGroup(shardManager, group);
                        break;
                    case EXCLUDE:
                        balanceExcludeShardGroup(shardManager, group);
                    case NONE:
                    case RANDOM:
                    default:
                        break;
                }
            } catch (StarException exception) {
                LOG.info("Got exception during processing service:{}, shardgroup:{} balance check, skip it.",
                        serviceId, groupId, exception);
            }
        }
    }

    private void balanceExcludeShardGroup(ShardManager shardManager, ShardGroup group) {
        // (A EXCLUDE B) && (B EXCLUDE C) => (A EXCLUDE C), NO!
        ReplicaWorkerInvertIndex index = new ReplicaWorkerInvertIndex();
        index.buildFrom(workerManager, shardManager, group);
        for (long workerGroupId : index.getAllWorkerGroupIds()) {
            WorkerGroup workerGroup = workerManager.getWorkerGroup(group.getServiceId(), workerGroupId);
            if (workerGroup == null) {
                continue;
            }
            List workerIds = workerGroup.getAllWorkerIds(true);
            if (workerIds.size() < 2) {
                LOG.info("worker group:{} only has {} alive workers. Skip the balance check.",
                        workerGroupId, workerIds.size());
                continue;
            }

            // Divide workers into two sets, HAVE replicas v.s. NO replicas
            // move replicas from HAVE replicas to NO replicas.
            // DON'T try to move around for following case:
            // workerA: (X, Y, Z), workerB: (W) --> workerA: (X, Y), workerB: (Z, W)
            // It breaks the EXCLUDE rule, makes no sense to balance around.
            List srcWorkerIds = new ArrayList<>();
            List tgtWorkerIds = new ArrayList<>();
            for (long workerId : workerIds) {
                if (index.getReplicaShardList(workerId).isEmpty()) {
                    tgtWorkerIds.add(workerId);
                } else {
                    srcWorkerIds.add(workerId);
                }
            }
            if (tgtWorkerIds.isEmpty()) {
                // no available workers to move replica
                return;
            }

            // Sort by its replicaNum in reverseOrder
            srcWorkerIds.sort(
                    (o1, o2) -> Integer.compare(index.getReplicaShardList(o2).size(), index.getReplicaShardList(o1).size()));

            ScheduleScorer tgtScore = new ScheduleScorer(tgtWorkerIds);
            tgtScore.apply(workerManager);

            Iterator srcIt = srcWorkerIds.iterator();
            while (srcIt.hasNext() && !tgtScore.isEmpty()) {
                // srcWorkerId: least score, targetWorkerId: most score, move from srcWorkerId -> targetWorkerId
                long srcWorkerId = srcIt.next();
                List tgtIdList = tgtScore.selectHighEnd(selector, 1);
                if (tgtIdList.isEmpty()) {
                    break;
                }
                long tgtWorkerId = tgtIdList.get(0);
                List candidates = new ArrayList<>(index.getReplicaShardList(srcWorkerId));
                if (candidates.size() <= 1) {
                    // No need to move any replica
                    break;
                }
                long selected = candidates.get(0);
                String serviceId = group.getServiceId();
                try {
                    LOG.info("[ExcludeGroup] Try to balance shard:{} (service:{}) replica from worker:{} => worker:{}",
                            selected, serviceId, srcWorkerId, tgtWorkerId);
                    scheduler.scheduleAddToWorker(serviceId, selected, tgtWorkerId);
                    // remove tgtWorkerId from scorer
                    tgtScore.remove(tgtWorkerId);
                    // only submit the asyncRemoveFromWorker if AddToWorker success.
                    scheduler.scheduleAsyncRemoveFromWorker(
                            serviceId, Collections.nCopies(1, selected), srcWorkerId);
                } catch (Exception exception) {
                    LOG.info("[ExcludeGroup] Fail to balance shard:{} in service:{}, form worker:{} to worker:{}. Error:",
                            selected, serviceId, srcWorkerId, tgtWorkerId, exception);
                }
            }
        }
    }

    private void balancePackShardGroup(ShardManager shardManager, ShardGroup group) {
        // (A PACK B) && (B PACK C) => (A PACK C), YES!
        // TODO: mark the PACK shard group UNSTABLE?
        // recursively find all PACK shard groups who are connected by the same shard.
        List packGroups = new ArrayList<>();
        List packShards = new ArrayList<>();
        int replicaNum = collectAllShardsAndGroupsRecursively(shardManager, group, packGroups, packShards);
        LOG.info("PACK groups balance: groups:{}, shards:{}, replicaNum:{}",
                packGroups.size(), packShards.size(), replicaNum);

        Optional minGroupId = packGroups.stream().map(ShardGroup::getGroupId).min(Comparator.naturalOrder());
        Preconditions.checkState(minGroupId.isPresent());
        if (minGroupId.get() < group.getGroupId()) {
            // must have been processed by the minimal group id.
            return;
        }

        ReplicaWorkerInvertIndex index = new ReplicaWorkerInvertIndex();
        packShards.forEach(x -> index.addReplicas(workerManager, x));
        for (long workerGroupId : index.getAllWorkerGroupIds()) {
            WorkerGroup workerGroup = workerManager.getWorkerGroup(group.getServiceId(), workerGroupId);
            if (workerGroup == null) {
                continue;
            }
            List workerIds = workerGroup.getAllWorkerIds(true);
            if (workerIds.size() < 2) {
                LOG.info("worker group:{} only has {} alive workers. Skip the balance check.",
                        workerGroup.getGroupId(), workerIds.size());
                continue;
            }
            if (workerIds.size() < replicaNum) {
                // Can't fulfill the requirement any way.
                LOG.info("Worker group:{} only has {} alive workers. Shard in PACK shard group requires {} replicas. Skip it.",
                        workerGroupId, workerIds.size(), replicaNum);
                continue;
            }
            balancePackGroupInSingleWorkerGroup(workerIds, group, replicaNum, packGroups, index);
        }
    }

    /**
     * Collect all shards and PACK shard groups starting from a specific shard group recursively
     *
     * @param shardManager shard manager
     * @param startGroup   the shard group to start the visit
     * @param groups       result group list
     * @param shards       result shard list
     * @return replica number
     * @throws StarException StarException throws when replica number check fails
     */
    private int collectAllShardsAndGroupsRecursively(ShardManager shardManager, ShardGroup startGroup,
                                                     List groups, List shards) throws StarException {
        int replica = -1;
        List todoGroup = new ArrayList<>();
        todoGroup.add(startGroup);
        List blockList = new ArrayList<>();
        while (!todoGroup.isEmpty()) {
            List newGroups = new ArrayList<>();
            for (ShardGroup grp : todoGroup) {
                if (groups.contains(grp)) { // group already checked
                    continue;
                }
                groups.add(grp);
                for (long shardId : grp.getShardIds()) {
                    Shard shard = shardManager.getShard(shardId);
                    if (shard == null || shards.contains(shard) || blockList.contains(shard)) {
                        // the shard doesn't exist or is already checked
                        continue;
                    }
                    if (replica == -1) {
                        replica = shard.getExpectedReplicaCount();
                    } else if (replica != shard.getExpectedReplicaCount()) {
                        throw new StarException(ExceptionCode.INTERNAL, String.format(
                                "Inconsistent shard replica number in PACK group, expected number:%d, actual number:%d",
                                replica, shard.getExpectedReplicaCount()));
                    }
                    boolean hasPrecedenceGroup = false;
                    for (long gid : shard.getGroupIds()) {
                        ShardGroup grp2 = shardManager.getShardGroup(gid);
                        if (grp2 != null) {
                            if (grp2.getPlacementPolicy() == PlacementPolicy.PACK) {
                                newGroups.add(grp2);
                            } else if (grp2.getPlacementPolicy().getNumber() > PlacementPolicy.PACK.getNumber() &&
                                    grp2.getShardIds().size() > 1) {
                                // skip the shard, let exclude group adjusting the distribution.
                                hasPrecedenceGroup = true;
                            }
                        }
                    }
                    if (hasPrecedenceGroup) {
                        blockList.add(shard);
                    } else {
                        shards.add(shard);
                    }
                }
            }
            todoGroup = newGroups;
        }
        return replica;
    }

    private void balancePackGroupInSingleWorkerGroup(List workerIds, ShardGroup currentGroup, int replicaNum,
                                                     List groups, ReplicaWorkerInvertIndex index) {
        ScheduleScorer scorer = new ScheduleScorer(workerIds);
        List workersWithReplicas = new ArrayList<>();
        for (long workerId : workerIds) {
            int numReplicas = index.getReplicaShardList(workerId).size();
            if (numReplicas == 0) {
                continue;
            }
            workersWithReplicas.add(workerId);
            scorer.apply(PlacementPolicy.PACK, Collections.nCopies(numReplicas, workerId));
        }
        if (workersWithReplicas.size() == replicaNum) {
            // ALL DONE, shard health checker will take care of the missing replicas
            return;
        }
        scorer.apply(workerManager);
        List selectedWorkers = scorer.selectHighEnd(selector, replicaNum);
        if (selectedWorkers.size() != replicaNum) {
            // defensive coding, right now it is impossible as long as workerIds.size() >= replicaNum
            LOG.info("Failed to select {} workers from candidates while doing shard group:{} balance check, skip it!",
                    replicaNum, currentGroup.getGroupId());
            return;
        }

        LOG.debug("[PackGroup] shardGroup: {}. Existing workers with replica: {}, selected tgargetWorkers: {}",
                currentGroup.getGroupId(), workersWithReplicas, selectedWorkers);

        List existShardIds = new ArrayList<>();
        workersWithReplicas.forEach(x -> existShardIds.addAll(index.getReplicaShardList(x)));
        List validTargetShardIdList = new ArrayList<>();
        for (ShardGroup packGroup : groups) {
            // skip the shard groups who have no replicas in this worker group at all.
            if (packGroup.getShardIds().stream().anyMatch(existShardIds::contains)) {
                validTargetShardIdList.addAll(packGroup.getShardIds());
            }
        }

        for (long wid : selectedWorkers) {
            Collection existingIdList = index.getReplicaShardList(wid);
            List todoIdList = validTargetShardIdList.stream()
                    .filter(x -> !existingIdList.contains(x))
                    .collect(Collectors.toList());
            if (todoIdList.isEmpty()) {
                continue;
            }
            try {
                // add new replica and wait for the result.
                // NOTE: this is a batch adding operation without considering shard existing replica count,
                //  so it will get replicas back to expected count if any shard has missing replicas.
                scheduler.scheduleAddToWorker(currentGroup.getServiceId(), todoIdList, wid);
            } catch (StarException exception) {
                LOG.info("Fail to schedule new replicas of shard:{} to worker:{}, error:", todoIdList, wid, exception);
                return;
            }
        }

        // remove selectedWorkers, replicas on the remaining workers will be cleaned
        workersWithReplicas.removeIf(selectedWorkers::contains);
        for (long wid : workersWithReplicas) {
            Collection todoList = index.getReplicaShardList(wid);
            if (todoList.isEmpty()) {
                continue;
            }
            // schedule remove jobs asynchronously.
            LOG.info("Submit async task to remove shard replicas:{} from worker:{}", todoList, wid);
            scheduler.scheduleAsyncRemoveFromWorker(currentGroup.getServiceId(), new ArrayList<>(todoList), wid);
        }
    }

    private void balanceSpreadShardGroup(ShardManager shardManager, ShardGroup group) {
        // (A SPREAD B) && (B SPREAD C) => (A SPREAD C), NO!
        //
        // Assume a new replica of a shard is to be added to the worker group.
        // all target workers are scored from most preferred to least preferred.
        // The balancer is to move a few replica from least preferred worker to most preferred worker.
        ReplicaWorkerInvertIndex index = new ReplicaWorkerInvertIndex();
        index.buildFrom(workerManager, shardManager, group);
        for (long workerGroupId : index.getAllWorkerGroupIds()) {
            WorkerGroup workerGroup = workerManager.getWorkerGroup(group.getServiceId(), workerGroupId);
            if (workerGroup == null) {
                continue;
            }
            List workerIds = workerGroup.getAllWorkerIds(true);
            if (workerIds.size() < 2) {
                LOG.info("worker group:{} only has {} alive workers. Skip the balance check.",
                        workerGroupId, workerIds.size());
                continue;
            }
            ScheduleScorer scorer = new ScheduleScorer(workerIds);
            for (long workerId : workerIds) {
                int numReplicas = index.getReplicaShardList(workerId).size();
                if (numReplicas == 0) {
                    continue;
                }
                scorer.apply(group.getPlacementPolicy(), Collections.nCopies(numReplicas, workerId));
            }
            scorer.apply(workerManager);
            List> sortedEntries = scorer.getScores().entrySet().stream()
                    .sorted(Map.Entry.comparingByValue(Comparator.naturalOrder()))
                    .collect(Collectors.toList());
            int head = 0;
            int tail = sortedEntries.size() - 1;
            while (head < tail) {
                // srcWorkerId: least score, targetWorkerId: most score, move from srcWorkerId -> targetWorkerId
                long srcWorkerId = sortedEntries.get(head).getKey();
                long targetWorkerId = sortedEntries.get(tail).getKey();
                Collection srcList = index.getReplicaShardList(srcWorkerId);
                Collection tgtList = index.getReplicaShardList(targetWorkerId);
                // TODO: sortedEntries is not strictly sorted by replica numbers of this shard group.
                //  It is possible that a worker with more replicas of this shard group is ahead of
                //  a worker with less replicas of this shard group due to its load and total replicas.
                if (srcList.size() <= tgtList.size() + Config.SCHEDULER_BALANCE_MAX_SKEW) {
                    break;
                }

                // Select a shard from `srcList` and migrate to `tgtList`
                List candidates = srcList.stream()
                        .filter(x -> !tgtList.contains(x)) // doesn't have replica in target worker
                        .map(shardManager::getShard)
                        .filter(y -> y != null &&
                                !hasPrecedenceShardGroup(shardManager, group.getPlacementPolicy(), y.getGroupIds()))
                        .collect(Collectors.toList());
                if (!candidates.isEmpty()) {
                    // TODO: random choose one ?
                    //  or the shard with more replicas will be highly preferred
                    //  or if the shard doesn't have enough expected num of replicas, will be skipped in this round.
                    Shard selected = candidates.get(0);
                    try {
                        LOG.info("Try to balance shard:{} (service:{}) replica from worker:{} => worker:{}",
                                selected.getShardId(), selected.getServiceId(), srcWorkerId, targetWorkerId);
                        scheduler.scheduleAddToWorker(selected.getServiceId(), selected.getShardId(), targetWorkerId);
                        // only submit the asyncRemoveFromWorker if AddToWorker success.
                        scheduler.scheduleAsyncRemoveFromWorker(
                                selected.getServiceId(), Collections.nCopies(1, selected.getShardId()), srcWorkerId);
                    } catch (Exception exception) {
                        LOG.info("Fail to balance shard:{} in service:{}, form worker:{} to worker:{}. Error:",
                                selected.getShardId(), selected.getServiceId(), srcWorkerId, targetWorkerId, exception);
                    }
                } else {
                    // TODO: If there is no any candidates, try to look into PACK group, check if there is any PACK group
                    //  can be migrated together.
                }
                ++head;
                --tail;
            }
        }
    }

    /**
     * Determine if any shard group in given shard group id list has a higher precedence than policy.
     *
     * @param shardManager shard manager, to get shard group by id
     * @param policy       baseline policy
     * @param groupIds     the list of shard groups to be compared.
     * @return true if there are higher shard group precedence.
     */
    private boolean hasPrecedenceShardGroup(ShardManager shardManager, PlacementPolicy policy, List groupIds) {
        for (long id : groupIds) {
            ShardGroup group = shardManager.getShardGroup(id);
            if (group == null) {
                continue;
            }
            if (group.getPlacementPolicy().getNumber() > policy.getNumber() && group.getShardIds().size() > 1) {
                return true;
            }
        }
        return false;
    }
}