All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.grpc.xds.RingHashLoadBalancer Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2021 The gRPC Authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.grpc.xds;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static io.grpc.ConnectivityState.CONNECTING;
import static io.grpc.ConnectivityState.IDLE;
import static io.grpc.ConnectivityState.READY;
import static io.grpc.ConnectivityState.SHUTDOWN;
import static io.grpc.ConnectivityState.TRANSIENT_FAILURE;

import com.google.common.base.MoreObjects;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import com.google.common.primitives.UnsignedInteger;
import io.grpc.Attributes;
import io.grpc.ConnectivityState;
import io.grpc.EquivalentAddressGroup;
import io.grpc.InternalLogId;
import io.grpc.LoadBalancer;
import io.grpc.Status;
import io.grpc.SynchronizationContext;
import io.grpc.util.MultiChildLoadBalancer;
import io.grpc.xds.client.XdsLogger;
import io.grpc.xds.client.XdsLogger.XdsLogLevel;
import java.net.SocketAddress;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nullable;

/**
 * A {@link LoadBalancer} that provides consistent hashing based load balancing to upstream hosts.
 * It implements the "Ketama" hashing that maps hosts onto a circle (the "ring") by hashing its
 * addresses. Each request is routed to a host by hashing some property of the request and finding
 * the nearest corresponding host clockwise around the ring. Each host is placed on the ring some
 * number of times proportional to its weight. With the ring partitioned appropriately, the
 * addition or removal of one host from a set of N hosts will affect only 1/N requests.
 */
final class RingHashLoadBalancer extends MultiChildLoadBalancer {
  private static final Status RPC_HASH_NOT_FOUND =
      Status.INTERNAL.withDescription("RPC hash not found. Probably a bug because xds resolver"
          + " config selector always generates a hash.");
  private static final XxHash64 hashFunc = XxHash64.INSTANCE;

  private final LoadBalancer.Factory lazyLbFactory =
      new LazyLoadBalancer.Factory(pickFirstLbProvider);
  private final XdsLogger logger;
  private final SynchronizationContext syncContext;
  private List ring;

  RingHashLoadBalancer(Helper helper) {
    super(helper);
    syncContext = checkNotNull(helper.getSynchronizationContext(), "syncContext");
    logger = XdsLogger.withLogId(InternalLogId.allocate("ring_hash_lb", helper.getAuthority()));
    logger.log(XdsLogLevel.INFO, "Created");
  }

  @Override
  public Status acceptResolvedAddresses(ResolvedAddresses resolvedAddresses) {
    logger.log(XdsLogLevel.DEBUG, "Received resolution result: {0}", resolvedAddresses);
    List addrList = resolvedAddresses.getAddresses();
    Status addressValidityStatus = validateAddrList(addrList);
    if (!addressValidityStatus.isOk()) {
      return addressValidityStatus;
    }

    try {
      resolvingAddresses = true;
      AcceptResolvedAddrRetVal acceptRetVal = acceptResolvedAddressesInternal(resolvedAddresses);
      if (!acceptRetVal.status.isOk()) {
        return acceptRetVal.status;
      }

      // Now do the ringhash specific logic with weights and building the ring
      RingHashConfig config = (RingHashConfig) resolvedAddresses.getLoadBalancingPolicyConfig();
      if (config == null) {
        throw new IllegalArgumentException("Missing RingHash configuration");
      }
      Map serverWeights = new HashMap<>();
      long totalWeight = 0L;
      for (EquivalentAddressGroup eag : addrList) {
        Long weight = eag.getAttributes().get(InternalXdsAttributes.ATTR_SERVER_WEIGHT);
        // Support two ways of server weighing: either multiple instances of the same address
        // or each address contains a per-address weight attribute. If a weight is not provided,
        // each occurrence of the address will be counted a weight value of one.
        if (weight == null) {
          weight = 1L;
        }
        totalWeight += weight;
        EquivalentAddressGroup addrKey = stripAttrs(eag);
        if (serverWeights.containsKey(addrKey)) {
          serverWeights.put(addrKey, serverWeights.get(addrKey) + weight);
        } else {
          serverWeights.put(addrKey, weight);
        }
      }
      // Calculate scale
      long minWeight = Collections.min(serverWeights.values());
      double normalizedMinWeight = (double) minWeight / totalWeight;
      // Scale up the number of hashes per host such that the least-weighted host gets a whole
      // number of hashes on the the ring. Other hosts might not end up with whole numbers, and
      // that's fine (the ring-building algorithm can handle this). This preserves the original
      // implementation's behavior: when weights aren't provided, all hosts should get an equal
      // number of hashes. In the case where this number exceeds the max_ring_size, it's scaled
      // back down to fit.
      double scale = Math.min(
          Math.ceil(normalizedMinWeight * config.minRingSize) / normalizedMinWeight,
          (double) config.maxRingSize);

      // Build the ring
      ring = buildRing(serverWeights, totalWeight, scale);

      // Must update channel picker before return so that new RPCs will not be routed to deleted
      // clusters and resolver can remove them in service config.
      updateOverallBalancingState();

      shutdownRemoved(acceptRetVal.removedChildren);
    } finally {
      this.resolvingAddresses = false;
    }

    return Status.OK;
  }


  /**
   * Updates the overall balancing state by aggregating the connectivity states of all subchannels.
   *
   * 

Aggregation rules (in order of dominance): *

    *
  1. If there is at least one subchannel in READY state, overall state is READY
  2. *
  3. If there are 2 or more subchannels in TRANSIENT_FAILURE, overall state is * TRANSIENT_FAILURE (to allow timely failover to another policy)
  4. *
  5. If there is at least one subchannel in CONNECTING state, overall state is * CONNECTING
  6. *
  7. If there is one subchannel in TRANSIENT_FAILURE state and there is * more than one subchannel, report CONNECTING
  8. *
  9. If there is at least one subchannel in IDLE state, overall state is IDLE
  10. *
  11. Otherwise, overall state is TRANSIENT_FAILURE
  12. *
*/ @Override protected void updateOverallBalancingState() { checkState(!getChildLbStates().isEmpty(), "no subchannel has been created"); if (this.currentConnectivityState == SHUTDOWN) { // Ignore changes that happen after shutdown is called logger.log(XdsLogLevel.DEBUG, "UpdateOverallBalancingState called after shutdown"); return; } // Calculate the current overall state to report int numIdle = 0; int numReady = 0; int numConnecting = 0; int numTF = 0; forloop: for (ChildLbState childLbState : getChildLbStates()) { ConnectivityState state = childLbState.getCurrentState(); switch (state) { case READY: numReady++; break forloop; case CONNECTING: numConnecting++; break; case IDLE: numIdle++; break; case TRANSIENT_FAILURE: numTF++; break; default: // ignore it } } ConnectivityState overallState; if (numReady > 0) { overallState = READY; } else if (numTF >= 2) { overallState = TRANSIENT_FAILURE; } else if (numConnecting > 0) { overallState = CONNECTING; } else if (numTF == 1 && getChildLbStates().size() > 1) { overallState = CONNECTING; } else if (numIdle > 0) { overallState = IDLE; } else { overallState = TRANSIENT_FAILURE; } RingHashPicker picker = new RingHashPicker(syncContext, ring, getChildLbStates()); getHelper().updateBalancingState(overallState, picker); this.currentConnectivityState = overallState; } @Override protected ChildLbState createChildLbState(Object key) { return new ChildLbState(key, lazyLbFactory); } private Status validateAddrList(List addrList) { if (addrList.isEmpty()) { Status unavailableStatus = Status.UNAVAILABLE.withDescription("Ring hash lb error: EDS " + "resolution was successful, but returned server addresses are empty."); handleNameResolutionError(unavailableStatus); return unavailableStatus; } String dupAddrString = validateNoDuplicateAddresses(addrList); if (dupAddrString != null) { Status unavailableStatus = Status.UNAVAILABLE.withDescription("Ring hash lb error: EDS " + "resolution was successful, but there were duplicate addresses: " + dupAddrString); handleNameResolutionError(unavailableStatus); return unavailableStatus; } long totalWeight = 0; for (EquivalentAddressGroup eag : addrList) { Long weight = eag.getAttributes().get(InternalXdsAttributes.ATTR_SERVER_WEIGHT); if (weight == null) { weight = 1L; } if (weight < 0) { Status unavailableStatus = Status.UNAVAILABLE.withDescription( String.format("Ring hash lb error: EDS resolution was successful, but returned a " + "negative weight for %s.", stripAttrs(eag))); handleNameResolutionError(unavailableStatus); return unavailableStatus; } if (weight > UnsignedInteger.MAX_VALUE.longValue()) { Status unavailableStatus = Status.UNAVAILABLE.withDescription( String.format("Ring hash lb error: EDS resolution was successful, but returned a weight" + " too large to fit in an unsigned int for %s.", stripAttrs(eag))); handleNameResolutionError(unavailableStatus); return unavailableStatus; } totalWeight += weight; } if (totalWeight > UnsignedInteger.MAX_VALUE.longValue()) { Status unavailableStatus = Status.UNAVAILABLE.withDescription( String.format( "Ring hash lb error: EDS resolution was successful, but returned a sum of weights too" + " large to fit in an unsigned int (%d).", totalWeight)); handleNameResolutionError(unavailableStatus); return unavailableStatus; } return Status.OK; } @Nullable private String validateNoDuplicateAddresses(List addrList) { Set addresses = new HashSet<>(); Multiset dups = HashMultiset.create(); for (EquivalentAddressGroup eag : addrList) { for (SocketAddress address : eag.getAddresses()) { if (!addresses.add(address)) { dups.add(address.toString()); } } } if (!dups.isEmpty()) { return dups.entrySet().stream() .map((dup) -> String.format("Address: %s, count: %d", dup.getElement(), dup.getCount() + 1)) .collect(Collectors.joining("; ")); } return null; } private static List buildRing( Map serverWeights, long totalWeight, double scale) { List ring = new ArrayList<>(); double currentHashes = 0.0; double targetHashes = 0.0; for (Map.Entry entry : serverWeights.entrySet()) { Endpoint endpoint = new Endpoint(entry.getKey()); double normalizedWeight = (double) entry.getValue() / totalWeight; // Per GRFC A61 use the first address for the hash StringBuilder sb = new StringBuilder(entry.getKey().getAddresses().get(0).toString()); sb.append('_'); int lengthWithoutCounter = sb.length(); targetHashes += scale * normalizedWeight; long i = 0L; while (currentHashes < targetHashes) { sb.append(i); long hash = hashFunc.hashAsciiString(sb.toString()); ring.add(new RingEntry(hash, endpoint)); i++; currentHashes++; sb.setLength(lengthWithoutCounter); } } Collections.sort(ring); return Collections.unmodifiableList(ring); } @SuppressWarnings("ReferenceEquality") public static EquivalentAddressGroup stripAttrs(EquivalentAddressGroup eag) { if (eag.getAttributes() == Attributes.EMPTY) { return eag; } return new EquivalentAddressGroup(eag.getAddresses()); } private static final class RingHashPicker extends SubchannelPicker { private final SynchronizationContext syncContext; private final List ring; // Avoid synchronization between pickSubchannel and subchannel's connectivity state change, // freeze picker's view of subchannel's connectivity state. // TODO(chengyuanzhang): can be more performance-friendly with // IdentityHashMap and RingEntry contains Subchannel. private final Map pickableSubchannels; // read-only private RingHashPicker( SynchronizationContext syncContext, List ring, Collection children) { this.syncContext = syncContext; this.ring = ring; pickableSubchannels = new HashMap<>(children.size()); for (ChildLbState childLbState : children) { pickableSubchannels.put((Endpoint)childLbState.getKey(), new SubchannelView(childLbState, childLbState.getCurrentState())); } } // Find the ring entry with hash next to (clockwise) the RPC's hash (binary search). private int getTargetIndex(Long requestHash) { if (ring.size() <= 1) { return 0; } int low = 0; int high = ring.size() - 1; int mid = (low + high) / 2; do { long midVal = ring.get(mid).hash; long midValL = mid == 0 ? 0 : ring.get(mid - 1).hash; if (requestHash <= midVal && requestHash > midValL) { break; } if (midVal < requestHash) { low = mid + 1; } else { high = mid - 1; } mid = (low + high) / 2; } while (mid < ring.size() && low <= high); return mid; } @Override public PickResult pickSubchannel(PickSubchannelArgs args) { Long requestHash = args.getCallOptions().getOption(XdsNameResolver.RPC_HASH_KEY); if (requestHash == null) { return PickResult.withError(RPC_HASH_NOT_FOUND); } int targetIndex = getTargetIndex(requestHash); // Per gRFC A61, because of sticky-TF with PickFirst's auto reconnect on TF, we ignore // all TF subchannels and find the first ring entry in READY, CONNECTING or IDLE. If // CONNECTING or IDLE we return a pick with no results. Additionally, if that entry is in // IDLE, we initiate a connection. for (int i = 0; i < ring.size(); i++) { int index = (targetIndex + i) % ring.size(); SubchannelView subchannelView = pickableSubchannels.get(ring.get(index).addrKey); ChildLbState childLbState = subchannelView.childLbState; if (subchannelView.connectivityState == READY) { return childLbState.getCurrentPicker().pickSubchannel(args); } // RPCs can be buffered if the next subchannel is pending (per A62). Otherwise, RPCs // are failed unless there is a READY connection. if (subchannelView.connectivityState == CONNECTING) { return PickResult.withNoResult(); } if (subchannelView.connectivityState == IDLE) { syncContext.execute(() -> { childLbState.getLb().requestConnection(); }); return PickResult.withNoResult(); // Indicates that this should be retried after backoff } } // return the pick from the original subchannel hit by hash, which is probably an error ChildLbState originalSubchannel = pickableSubchannels.get(ring.get(targetIndex).addrKey).childLbState; return originalSubchannel.getCurrentPicker().pickSubchannel(args); } } /** * An unmodifiable view of a subchannel with state not subject to its real connectivity * state changes. */ private static final class SubchannelView { private final ChildLbState childLbState; private final ConnectivityState connectivityState; private SubchannelView(ChildLbState childLbState, ConnectivityState state) { this.childLbState = childLbState; this.connectivityState = state; } } private static final class RingEntry implements Comparable { private final long hash; private final Endpoint addrKey; private RingEntry(long hash, Endpoint addrKey) { this.hash = hash; this.addrKey = addrKey; } @Override public int compareTo(RingEntry entry) { return Long.compare(hash, entry.hash); } } /** * Configures the ring property. The larger the ring is (that is, the more hashes there are * for each provided host) the better the request distribution will reflect the desired weights. */ static final class RingHashConfig { final long minRingSize; final long maxRingSize; RingHashConfig(long minRingSize, long maxRingSize) { checkArgument(minRingSize > 0, "minRingSize <= 0"); checkArgument(maxRingSize > 0, "maxRingSize <= 0"); checkArgument(minRingSize <= maxRingSize, "minRingSize > maxRingSize"); this.minRingSize = minRingSize; this.maxRingSize = maxRingSize; } @Override public String toString() { return MoreObjects.toStringHelper(this) .add("minRingSize", minRingSize) .add("maxRingSize", maxRingSize) .toString(); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy