All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pinot.broker.routing.instanceselector.ReplicaGroupInstanceSelector Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pinot.broker.routing.instanceselector;

import java.time.Clock;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import org.apache.helix.zookeeper.datamodel.ZNRecord;
import org.apache.pinot.broker.routing.adaptiveserverselector.AdaptiveServerSelector;
import org.apache.pinot.common.metrics.BrokerMetrics;
import org.apache.pinot.common.utils.HashUtil;
import org.apache.pinot.common.utils.config.QueryOptionsUtils;


/**
 * Instance selector for replica-group routing strategy.
 * 

The selection algorithm will always evenly distribute the traffic to all replicas of each segment, and will select * the same index of the enabled instances for all segments with the same number of replicas. The algorithm is very * light-weight and will do best effort to select the least servers for the request. *

The algorithm relies on the mirror segment assignment from replica-group segment assignment strategy. With mirror * segment assignment, any server in one replica-group will always have a corresponding server in other replica-groups * that have the same segments assigned. For an example, if S1 is a server in replica-group 1, and it has mirror server * S2 in replica-group 2 and S3 in replica-group 3. All segments assigned to S1 will also be assigned to S2 and S3. In * stable scenario (external view matches ideal state), all segments assigned to S1 will have the same enabled instances * of [S1, S2, S3] sorted (in alphabetical order). If we always pick the same index of enabled instances for all * segments, only one of S1, S2, S3 will be picked, so it is guaranteed that we pick the least server instances for the * request (there is no guarantee on choosing servers from the same replica-group though). In transitioning/error * scenario (external view does not match ideal state), there is no guarantee on picking the least server instances, but * the traffic is guaranteed to be evenly distributed to all available instances to avoid overwhelming hotspot servers. *

If the query option NUM_REPLICA_GROUPS_TO_QUERY is provided, the servers to be picked will be from different * replica groups such that segments are evenly distributed amongst the provided value of NUM_REPLICA_GROUPS_TO_QUERY. * Thus in case of [S1, S2, S3] if NUM_REPLICA_GROUPS_TO_QUERY = 2, the ReplicaGroup S1 and ReplicaGroup S2 will be * selected such that half the segments will come from S1 and other half from S2. If NUM_REPLICA_GROUPS_TO_QUERY value * is much greater than available servers, then ReplicaGroupInstanceSelector will behave similar to * BalancedInstanceSelector. *

If AdaptiveServerSelection is enabled, a single snapshot of the server ranking is fetched. This ranking is * referenced to pick the best available server for each segment. The algorithm ends up picking the minimum number of * servers required to process a query because it references a single snapshot of the server rankings. Currently, * NUM_REPLICA_GROUPS_TO_QUERY is not supported is AdaptiveServerSelection is enabled. */ public class ReplicaGroupInstanceSelector extends BaseInstanceSelector { public ReplicaGroupInstanceSelector(String tableNameWithType, ZkHelixPropertyStore propertyStore, BrokerMetrics brokerMetrics, @Nullable AdaptiveServerSelector adaptiveServerSelector, Clock clock) { super(tableNameWithType, propertyStore, brokerMetrics, adaptiveServerSelector, clock); } @Override Map select(List segments, int requestId, SegmentStates segmentStates, Map queryOptions) { if (_adaptiveServerSelector != null) { // Adaptive Server Selection is enabled. List serverRankList = new ArrayList<>(); List candidateServers = fetchCandidateServersForQuery(segments, segmentStates); // Fetch serverRankList before looping through all the segments. This is important to make sure that we pick // the least amount of instances for a query by referring to a single snapshot of the rankings. List> serverRankListWithScores = _adaptiveServerSelector.fetchServerRankingsWithScores(candidateServers); for (Pair entry : serverRankListWithScores) { serverRankList.add(entry.getLeft()); } return selectServersUsingAdaptiveServerSelector(segments, requestId, segmentStates, serverRankList); } else { // Adaptive Server Selection is NOT enabled. return selectServersUsingRoundRobin(segments, requestId, segmentStates, queryOptions); } } private Map selectServersUsingRoundRobin(List segments, int requestId, SegmentStates segmentStates, Map queryOptions) { Map selectedServers = new HashMap<>(HashUtil.getHashMapCapacity(segments.size())); Integer numReplicaGroupsToQuery = QueryOptionsUtils.getNumReplicaGroupsToQuery(queryOptions); int numReplicaGroups = numReplicaGroupsToQuery == null ? 1 : numReplicaGroupsToQuery; int replicaOffset = 0; for (String segment : segments) { List candidates = segmentStates.getCandidates(segment); // NOTE: candidates can be null when there is no enabled instances for the segment, or the instance selector has // not been updated (we update all components for routing in sequence) if (candidates == null) { continue; } // Round robin selection. int numCandidates = candidates.size(); int instanceIdx = (requestId + replicaOffset) % numCandidates; SegmentInstanceCandidate selectedInstance = candidates.get(instanceIdx); // Only put online instance. // This can only be offline when it is a new segment. if (selectedInstance.isOnline()) { selectedServers.put(segment, selectedInstance.getInstance()); } if (numReplicaGroups > numCandidates) { numReplicaGroups = numCandidates; } replicaOffset = (replicaOffset + 1) % numReplicaGroups; } return selectedServers; } private Map selectServersUsingAdaptiveServerSelector(List segments, int requestId, SegmentStates segmentStates, List serverRankList) { Map selectedServers = new HashMap<>(HashUtil.getHashMapCapacity(segments.size())); for (String segment : segments) { // NOTE: candidates can be null when there is no enabled instances for the segment, or the instance selector has // not been updated (we update all components for routing in sequence) List candidates = segmentStates.getCandidates(segment); if (candidates == null) { continue; } // Round Robin. int numCandidates = candidates.size(); int instanceIdx = requestId % numCandidates; SegmentInstanceCandidate selectedInstance = candidates.get(instanceIdx); // Adaptive Server Selection // TODO: Support numReplicaGroupsToQuery with Adaptive Server Selection. if (!serverRankList.isEmpty()) { int minIdx = Integer.MAX_VALUE; for (SegmentInstanceCandidate candidate : candidates) { int idx = serverRankList.indexOf(candidate.getInstance()); if (idx == -1) { // Let's use the round-robin approach until stats for all servers are populated. selectedInstance = candidates.get(instanceIdx); break; } if (idx < minIdx) { minIdx = idx; selectedInstance = candidate; } } } // Only put online instance. // This can only be offline when it is a new segment. if (selectedInstance.isOnline()) { selectedServers.put(segment, selectedInstance.getInstance()); } } return selectedServers; } private List fetchCandidateServersForQuery(List segments, SegmentStates segmentStates) { Set candidateServers = new HashSet<>(); for (String segment : segments) { List candidates = segmentStates.getCandidates(segment); if (candidates == null) { continue; } for (SegmentInstanceCandidate candidate : candidates) { candidateServers.add(candidate.getInstance()); } } return new ArrayList<>(candidateServers); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy