org.apache.solr.cluster.placement.plugins.AffinityPlacementFactory Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.cluster.placement.plugins;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.solr.cluster.Cluster;
import org.apache.solr.cluster.Node;
import org.apache.solr.cluster.Replica;
import org.apache.solr.cluster.SolrCollection;
import org.apache.solr.cluster.placement.AttributeFetcher;
import org.apache.solr.cluster.placement.AttributeValues;
import org.apache.solr.cluster.placement.BalanceRequest;
import org.apache.solr.cluster.placement.DeleteCollectionRequest;
import org.apache.solr.cluster.placement.PlacementContext;
import org.apache.solr.cluster.placement.PlacementException;
import org.apache.solr.cluster.placement.PlacementModificationException;
import org.apache.solr.cluster.placement.PlacementPlugin;
import org.apache.solr.cluster.placement.PlacementPluginFactory;
import org.apache.solr.cluster.placement.ReplicaMetric;
import org.apache.solr.cluster.placement.ShardMetrics;
import org.apache.solr.cluster.placement.impl.NodeMetricImpl;
import org.apache.solr.cluster.placement.impl.ReplicaMetricImpl;
import org.apache.solr.common.util.CollectionUtil;
import org.apache.solr.common.util.StrUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This factory is instantiated by config from its class name. Using it is the only way to create
 * instances of {@link AffinityPlacementPlugin}.
 *
 * In order to configure this plugin to be used for placement decisions, the following {@code
 * curl} command (or something equivalent) has to be executed once the cluster is already running in
 * order to set the appropriate Zookeeper stored configuration. Replace {@code localhost:8983} by
 * one of your servers' IP address and port.
 *
 * 
 *
 * curl -X POST -H 'Content-type:application/json' -d '{
 * "add": {
 *   "name": ".placement-plugin",
 *   "class": "org.apache.solr.cluster.placement.plugins.AffinityPlacementFactory",
 *   "config": {
 *     "minimalFreeDiskGB": 10,
 *     "prioritizedFreeDiskGB": 50
 *   }
 * }
 * }' http://localhost:8983/api/cluster/plugin
 * 
 *
 * In order to delete the placement-plugin section (and to fallback to either Legacy or rule
 * based placement if configured for a collection), execute:
 *
 * 
 *
 * curl -X POST -H 'Content-type:application/json' -d '{
 * "remove" : ".placement-plugin"
 * }' http://localhost:8983/api/cluster/plugin
 * 
 *
 * {@link AffinityPlacementPlugin} implements placing replicas in a way that replicate past
 * Autoscaling config defined here.
 *
 * 
This specification is doing the following:
 *
 * 
Spread replicas per shard as evenly as possible across multiple availability zones (given
 * by a sys prop), assign replicas based on replica type to specific kinds of nodes (another sys
 * prop), and avoid having more than one replica per shard on the same node.

 * Only after these constraints are satisfied do minimize cores per node or disk usage.
 *
 * 
This plugin achieves this by creating a {@link AffinityPlacementPlugin.AffinityNode} that
 * weights nodes very high if they are unbalanced with respect to AvailabilityZone and SpreadDomain.
 * See {@link AffinityPlacementPlugin.AffinityNode} for more information on how this weighting helps
 * the plugin correctly place and balance replicas.
 *
 * 
This code is a realistic placement computation, based on a few assumptions. The code is
 * written in such a way to make it relatively easy to adapt it to (somewhat) different assumptions.
 * Additional configuration options could be introduced to allow configuration base option selection
 * as well...
 */
public class AffinityPlacementFactory implements PlacementPluginFactory {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  AffinityPlacementConfig config = AffinityPlacementConfig.DEFAULT;

  /**
   * Empty public constructor is used to instantiate this factory. Using a factory pattern to allow
   * the factory to do one time costly operations if needed, and to only have to instantiate a
   * default constructor class by name, rather than having to call a constructor with more
   * parameters (if we were to instantiate the plugin class directly without going through a
   * factory).
   */
  public AffinityPlacementFactory() {}

  @Override
  public PlacementPlugin createPluginInstance() {
    config.validate();
    return new AffinityPlacementPlugin(
        config.minimalFreeDiskGB,
        config.prioritizedFreeDiskGB,
        config.withCollection,
        config.withCollectionShards,
        config.collectionNodeType,
        config.spreadAcrossDomains);
  }

  @Override
  public void configure(AffinityPlacementConfig cfg) {
    Objects.requireNonNull(cfg, "configuration must never be null");
    cfg.validate();
    this.config = cfg;
  }

  @Override
  public AffinityPlacementConfig getConfig() {
    return config;
  }

  /**
   * See {@link AffinityPlacementFactory} for instructions on how to configure a cluster to use this
   * plugin and details on what the plugin does.
   */
  public static class AffinityPlacementPlugin extends OrderedNodePlacementPlugin {

    private final long minimalFreeDiskGB;

    private final long prioritizedFreeDiskGB;

    // primary to secondary (1:1)
    private final Map withCollections;
    // same but shardwise
    private final Map withCollectionShards;
    // secondary to primary (1:N) + shard-wise_primary (1:N)
    private final Map> collocatedWith;

    private final Map> nodeTypes;

    private final boolean spreadAcrossDomains;

    /**
     * The factory has decoded the configuration for the plugin instance and passes it the
     * parameters it needs.
     */
    AffinityPlacementPlugin(
        long minimalFreeDiskGB,
        long prioritizedFreeDiskGB,
        Map withCollections,
        Map withCollectionShards,
        Map collectionNodeTypes,
        boolean spreadAcrossDomains) {
      this.minimalFreeDiskGB = minimalFreeDiskGB;
      this.prioritizedFreeDiskGB = prioritizedFreeDiskGB;
      Objects.requireNonNull(withCollections, "withCollections must not be null");
      Objects.requireNonNull(collectionNodeTypes, "collectionNodeTypes must not be null");
      Objects.requireNonNull(withCollectionShards, "withCollectionShards must not be null");
      this.spreadAcrossDomains = spreadAcrossDomains;
      this.withCollections = withCollections;
      this.withCollectionShards = withCollectionShards;
      Map> collocated = new HashMap<>();
      // reverse both relations: shard-agnostic and shard-wise
      List.of(this.withCollections, this.withCollectionShards)
          .forEach(
              direct ->
                  direct.forEach(
                      (primary, secondary) ->
                          collocated
                              .computeIfAbsent(secondary, s -> new HashSet<>())
                              .add(primary)));
      this.collocatedWith = Collections.unmodifiableMap(collocated);

      if (collectionNodeTypes.isEmpty()) {
        nodeTypes = Map.of();
      } else {
        nodeTypes = new HashMap<>();
        collectionNodeTypes.forEach(
            (coll, typesString) -> {
              List types = StrUtils.splitSmart(typesString, ',', true);
              if (!types.isEmpty()) {
                nodeTypes.put(coll, new HashSet<>(types));
              }
            });
      }
    }

    @Override
    protected void verifyDeleteCollection(
        DeleteCollectionRequest deleteCollectionRequest, PlacementContext placementContext)
        throws PlacementModificationException {
      Cluster cluster = placementContext.getCluster();
      Set collocatedCollections =
          collocatedWith.getOrDefault(deleteCollectionRequest.getCollection().getName(), Set.of());
      for (String primaryName : collocatedCollections) {
        try {
          if (cluster.getCollection(primaryName) != null) {
            // still exists
            throw new PlacementModificationException(
                "collocated collection "
                    + primaryName
                    + " of "
                    + deleteCollectionRequest.getCollection().getName()
                    + " still present");
          }
        } catch (IOException e) {
          throw new PlacementModificationException(
              "failed to retrieve collocated collection information", e);
        }
      }
    }

    /**
     * AffinityPlacementContext is used to share information across {@link AffinityNode} instances.
     *
     * 
For instance, with SpreadDomains and AvailabilityZones, the weighting of a Node requires
     * information on the contents of other Nodes. This class is how that information is shared.
     *
     * 
One AffinityPlacementContext is used for each call to {@link
     * #computePlacements(Collection, PlacementContext)} or {@link #computeBalancing(BalanceRequest,
     * PlacementContext)}. The state of the context will be altered throughout the computation.
     */
    private static final class AffinityPlacementContext {
      private final Set allSpreadDomains = new HashSet<>();
      private final Map> spreadDomainUsage = new HashMap<>();
      private final Set allAvailabilityZones = new HashSet<>();
      private final Map>>
          availabilityZoneUsage = new HashMap<>();
      private boolean doSpreadAcrossDomains;
    }

    @Override
    protected Map getBaseWeightedNodes(
        PlacementContext placementContext,
        Set nodes,
        Iterable relevantCollections,
        boolean skipNodesWithErrors)
        throws PlacementException {
      // Fetch attributes for a superset of all nodes requested amongst the placementRequests
      AttributeFetcher attributeFetcher = placementContext.getAttributeFetcher();
      attributeFetcher
          .requestNodeSystemProperty(AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP)
          .requestNodeSystemProperty(AffinityPlacementConfig.NODE_TYPE_SYSPROP)
          .requestNodeSystemProperty(AffinityPlacementConfig.REPLICA_TYPE_SYSPROP)
          .requestNodeSystemProperty(AffinityPlacementConfig.SPREAD_DOMAIN_SYSPROP);
      attributeFetcher
          .requestNodeMetric(NodeMetricImpl.NUM_CORES)
          .requestNodeMetric(NodeMetricImpl.FREE_DISK_GB);
      Set> replicaMetrics = Set.of(ReplicaMetricImpl.INDEX_SIZE_GB);
      Set requestedCollections = new HashSet<>();
      for (SolrCollection collection : relevantCollections) {
        if (requestedCollections.add(collection.getName())) {
          attributeFetcher.requestCollectionMetrics(collection, replicaMetrics);
        }
      }
      attributeFetcher.fetchFrom(nodes);
      final AttributeValues attrValues = attributeFetcher.fetchAttributes();

      AffinityPlacementContext affinityPlacementContext = new AffinityPlacementContext();
      affinityPlacementContext.doSpreadAcrossDomains = spreadAcrossDomains;

      Map affinityNodeMap = CollectionUtil.newHashMap(nodes.size());
      for (Node node : nodes) {
        AffinityNode affinityNode =
            newNodeFromMetrics(node, attrValues, affinityPlacementContext, skipNodesWithErrors);
        if (affinityNode != null) {
          affinityNodeMap.put(node, affinityNode);
        }
      }

      // If there are not multiple spreadDomains, then there is nothing to spread across
      if (affinityPlacementContext.allSpreadDomains.size() < 2) {
        affinityPlacementContext.doSpreadAcrossDomains = false;
      }

      return affinityNodeMap;
    }

    AffinityNode newNodeFromMetrics(
        Node node,
        AttributeValues attrValues,
        AffinityPlacementContext affinityPlacementContext,
        boolean skipNodesWithErrors)
        throws PlacementException {
      Set supportedReplicaTypes =
          attrValues.getSystemProperty(node, AffinityPlacementConfig.REPLICA_TYPE_SYSPROP).stream()
              .flatMap(s -> Arrays.stream(s.split(",")))
              .map(String::trim)
              .map(s -> s.toUpperCase(Locale.ROOT))
              .map(
                  s -> {
                    try {
                      return Replica.ReplicaType.valueOf(s);
                    } catch (IllegalArgumentException e) {
                      log.warn(
                          "Node {} has an invalid value for the {} systemProperty: {}",
                          node.getName(),
                          AffinityPlacementConfig.REPLICA_TYPE_SYSPROP,
                          s);
                      return null;
                    }
                  })
              .collect(Collectors.toSet());
      if (supportedReplicaTypes.isEmpty()) {
        // If property not defined or is only whitespace on a node, assuming node can take any
        // replica type
        supportedReplicaTypes = Set.of(Replica.ReplicaType.values());
      }

      Set nodeType;
      Optional nodePropOpt =
          attrValues.getSystemProperty(node, AffinityPlacementConfig.NODE_TYPE_SYSPROP);
      if (nodePropOpt.isEmpty()) {
        nodeType = Collections.emptySet();
      } else {
        nodeType = new HashSet<>(StrUtils.splitSmart(nodePropOpt.get(), ','));
      }

      Optional nodeFreeDiskGB = attrValues.getNodeMetric(node, NodeMetricImpl.FREE_DISK_GB);
      Optional nodeNumCores = attrValues.getNodeMetric(node, NodeMetricImpl.NUM_CORES);
      String az =
          attrValues
              .getSystemProperty(node, AffinityPlacementConfig.AVAILABILITY_ZONE_SYSPROP)
              .orElse(AffinityPlacementConfig.UNDEFINED_AVAILABILITY_ZONE);
      affinityPlacementContext.allAvailabilityZones.add(az);
      String spreadDomain;
      if (affinityPlacementContext.doSpreadAcrossDomains) {
        spreadDomain =
            attrValues
                .getSystemProperty(node, AffinityPlacementConfig.SPREAD_DOMAIN_SYSPROP)
                .orElse(null);
        if (spreadDomain == null) {
          if (log.isWarnEnabled()) {
            log.warn(
                "AffinityPlacementPlugin configured to spread across domains, but node {} does not have the {} system property. Ignoring spreadAcrossDomains.",
                node.getName(),
                AffinityPlacementConfig.SPREAD_DOMAIN_SYSPROP);
          }
          // In the context stop using spreadDomains, because we have a node without a spread
          // domain.
          affinityPlacementContext.doSpreadAcrossDomains = false;
          affinityPlacementContext.allSpreadDomains.clear();
        } else {
          affinityPlacementContext.allSpreadDomains.add(spreadDomain);
        }
      } else {
        spreadDomain = null;
      }
      if (nodeFreeDiskGB.isEmpty() && skipNodesWithErrors) {
        if (log.isWarnEnabled()) {
          log.warn(
              "Unknown free disk on node {}, excluding it from placement decisions.",
              node.getName());
        }
        return null;
      } else if (nodeNumCores.isEmpty() && skipNodesWithErrors) {
        if (log.isWarnEnabled()) {
          log.warn(
              "Unknown number of cores on node {}, excluding it from placement decisions.",
              node.getName());
        }
        return null;
      } else {
        return new AffinityNode(
            node,
            attrValues,
            affinityPlacementContext,
            supportedReplicaTypes,
            nodeType,
            nodeNumCores.orElse(0),
            nodeFreeDiskGB.orElse(0D),
            az,
            spreadDomain);
      }
    }

    /**
     * This implementation weights nodes in order to achieve balancing across AvailabilityZones and
     * SpreadDomains, while trying to minimize the amount of replicas on a node and ensure a given
     * disk space per node. This implementation also supports limiting the placement of certain
     * replica types per node and co-locating collections.
     *
     * 
The total weight of the AffinityNode is the sum of:
     *
     * 

     *   The number of replicas on the node
     *   
100 if the free disk space on the node < prioritizedFreeDiskGB, otherwise 0
     *   
If SpreadDomains are used:

     *       10,000 * the sum over each collection/shard:
     *       
     *         (# of replicas in this node's spread domain - the minimum spreadDomain's
     *             replicaCount)^2 

     *             These are individually squared to penalize higher values when summing up all
     *             values
     *       
     *   
If AvailabilityZones are used:

     *       1,000,000 * the sum over each collection/shard/replicaType:
     *       
     *         (# of replicas in this node's AZ - the minimum AZ's replicaCount)^2 

     *             These are individually squared to penalize higher values when summing up all
     *             values
     *       
     * 
     *
     * The weighting here ensures that the order of importance for nodes is:
     *
     * 
     *   Spread replicas of the same shard/replicaType across availabilityZones
     *   
Spread replicas of the same shard across spreadDomains
     *   
Make sure that replicas are not placed on nodes that have < prioritizedFreeDiskGB disk
     *       space available
     *   
Minimize the amount of replicas on the node
     * 
     *
     * The "relevant" weight with a replica is the sum of:
     *
     * 

     *   The number of replicas on the node
     *   
100 if the projected free disk space on the node < prioritizedFreeDiskGB, otherwise 0
     *   
If SpreadDomains are used:

     *       10,000 * ( # of replicas for the replica's shard in this node's spread domain - the
     *       minimum spreadDomain's replicaCount )
     *   
If AvailabilityZones are used:

     *       1,000,000 * ( # of replicas for the replica's shard & replicaType in this node's AZ -
     *       the minimum AZ's replicaCount )
     * 
     *
     * Multiple replicas of the same shard are not permitted to live on the same Node.
     *
     * 
Users can specify withCollection, to ensure that co-placement of replicas is ensured when
     * computing new replica placements or replica balancing.
     */
    private class AffinityNode extends WeightedNode {

      private final AttributeValues attrValues;

      private final AffinityPlacementContext affinityPlacementContext;

      private final Set supportedReplicaTypes;
      private final Set nodeType;

      private int coresOnNode;
      private double nodeFreeDiskGB;

      private final String availabilityZone;
      private final String spreadDomain;

      AffinityNode(
          Node node,
          AttributeValues attrValues,
          AffinityPlacementContext affinityPlacementContext,
          Set supportedReplicaTypes,
          Set nodeType,
          int coresOnNode,
          double nodeFreeDiskGB,
          String az,
          String spreadDomain) {
        super(node);
        this.attrValues = attrValues;
        this.affinityPlacementContext = affinityPlacementContext;
        this.supportedReplicaTypes = supportedReplicaTypes;
        this.nodeType = nodeType;
        this.coresOnNode = coresOnNode;
        this.nodeFreeDiskGB = nodeFreeDiskGB;
        this.availabilityZone = az;
        this.spreadDomain = spreadDomain;
      }

      @Override
      public int calcWeight() {
        return coresOnNode
            // Only add 100 if prioritizedFreeDiskGB was provided and the node's freeDisk is lower
            // than it
            + 100 * (prioritizedFreeDiskGB > 0 && nodeFreeDiskGB < prioritizedFreeDiskGB ? 1 : 0)
            + 10000 * getSpreadDomainWeight()
            + 1000000 * getAZWeight();
      }

      @Override
      public int calcRelevantWeightWithReplica(Replica replica) {
        return coresOnNode
            // Only add 100 if prioritizedFreeDiskGB was provided and the node's projected freeDisk
            // is lower than it
            + 100
                * (prioritizedFreeDiskGB > 0
                        && nodeFreeDiskGB - getProjectedSizeOfReplica(replica)
                            < prioritizedFreeDiskGB
                    ? 1
                    : 0)
            + 10000 * projectReplicaSpreadWeight(replica)
            + 1000000 * projectAZWeight(replica);
      }

      @Override
      public boolean canAddReplica(Replica replica) {
        String collection = replica.getShard().getCollection().getName();
        // By default, do not allow two replicas of the same shard on a node
        return super.canAddReplica(replica)
            // Filter out unsupported replica types
            && supportedReplicaTypes.contains(replica.getType())
            // Filter out unsupported node types
            && Optional.ofNullable(nodeTypes.get(collection))
                .map(s -> s.stream().anyMatch(nodeType::contains))
                .orElse(true)
            // Ensure any co-located collections already exist on the Node
            && Optional.ofNullable(withCollections.get(collection))
                .map(this::hasCollectionOnNode)
                .orElse(true)
            // Ensure same shard is collocated if required
            && Optional.ofNullable(withCollectionShards.get(collection))
                .map(
                    shardWiseOf ->
                        getShardsOnNode(shardWiseOf).contains(replica.getShard().getShardName()))
                .orElse(true)
            // Ensure the disk space will not go below the minimum if the replica is added
            && (minimalFreeDiskGB <= 0
                || nodeFreeDiskGB - getProjectedSizeOfReplica(replica) > minimalFreeDiskGB);
      }

      /**
       * Return any replicas that cannot be removed because there are collocated collections that
       * require the replica to exist.
       *
       * @param replicas the replicas to remove
       * @return any errors for replicas that cannot be removed
       */
      @Override
      public Map canRemoveReplicas(Collection replicas) {
        Map replicaRemovalExceptions = new HashMap<>();
        Map>> removals = new HashMap<>();
        for (Replica replica : replicas) {
          SolrCollection collection = replica.getShard().getCollection();
          Set collocatedCollections = new HashSet<>();
          Optional.ofNullable(collocatedWith.get(collection.getName()))
              .ifPresent(collocatedCollections::addAll);
          collocatedCollections.retainAll(getCollectionsOnNode());
          if (collocatedCollections.isEmpty()) {
            continue;
          }

          Stream shardWiseCollocations =
              collocatedCollections.stream()
                  .filter(
                      priColl -> collection.getName().equals(withCollectionShards.get(priColl)));
          final Set mandatoryShardsOrAll =
              shardWiseCollocations
                  .flatMap(priColl -> getShardsOnNode(priColl).stream())
                  .collect(Collectors.toSet());
          // There are collocatedCollections for this shard, so make sure there is a replica of this
          // shard left on the node after it is removed
          Set replicasRemovedForShard =
              removals
                  .computeIfAbsent(
                      replica.getShard().getCollection().getName(), k -> new HashMap<>())
                  .computeIfAbsent(replica.getShard().getShardName(), k -> new HashSet<>());
          replicasRemovedForShard.add(replica);
          // either if all shards are mandatory, or the current one is mandatory
          boolean shardWise = false;
          if (mandatoryShardsOrAll.isEmpty()
              || (shardWise = mandatoryShardsOrAll.contains(replica.getShard().getShardName()))) {
            if (replicasRemovedForShard.size()
                >= getReplicasForShardOnNode(replica.getShard()).size()) {
              replicaRemovalExceptions.put(
                  replica,
                  "co-located with replicas of "
                      + (shardWise ? replica.getShard().getShardName() + " of " : "")
                      + collocatedCollections);
            }
          }
        }
        return replicaRemovalExceptions;
      }

      @Override
      protected boolean addProjectedReplicaWeights(Replica replica) {
        nodeFreeDiskGB -= getProjectedSizeOfReplica(replica);
        coresOnNode += 1;
        return addReplicaToAzAndSpread(replica);
      }

      @Override
      protected void initReplicaWeights(Replica replica) {
        addReplicaToAzAndSpread(replica);
      }

      private boolean addReplicaToAzAndSpread(Replica replica) {
        boolean needsResort = false;
        // Only use AvailabilityZones if there are more than 1
        if (affinityPlacementContext.allAvailabilityZones.size() > 1) {
          needsResort |=
              affinityPlacementContext
                  .availabilityZoneUsage
                  .computeIfAbsent(
                      replica.getShard().getCollection().getName(), k -> new HashMap<>())
                  .computeIfAbsent(replica.getShard().getShardName(), k -> new HashMap<>())
                  .computeIfAbsent(
                      replica.getType(),
                      k -> new ReplicaSpread(affinityPlacementContext.allAvailabilityZones))
                  .addReplica(availabilityZone);
        }
        // Only use SpreadDomains if they have been provided to all nodes and there are more than 1
        if (affinityPlacementContext.doSpreadAcrossDomains) {
          needsResort |=
              affinityPlacementContext
                  .spreadDomainUsage
                  .computeIfAbsent(
                      replica.getShard().getCollection().getName(), k -> new HashMap<>())
                  .computeIfAbsent(
                      replica.getShard().getShardName(),
                      k -> new ReplicaSpread(affinityPlacementContext.allSpreadDomains))
                  .addReplica(spreadDomain);
        }
        return needsResort;
      }

      @Override
      protected void removeProjectedReplicaWeights(Replica replica) {
        nodeFreeDiskGB += getProjectedSizeOfReplica(replica);
        coresOnNode -= 1;
        // Only use AvailabilityZones if there are more than 1
        if (affinityPlacementContext.allAvailabilityZones.size() > 1) {
          Optional.ofNullable(
                  affinityPlacementContext.availabilityZoneUsage.get(
                      replica.getShard().getCollection().getName()))
              .map(m -> m.get(replica.getShard().getShardName()))
              .map(m -> m.get(replica.getType()))
              .ifPresent(m -> m.removeReplica(availabilityZone));
        }
        // Only use SpreadDomains if they have been provided to all nodes and there are more than 1
        if (affinityPlacementContext.doSpreadAcrossDomains) {
          Optional.ofNullable(
                  affinityPlacementContext.spreadDomainUsage.get(
                      replica.getShard().getCollection().getName()))
              .map(m -> m.get(replica.getShard().getShardName()))
              .ifPresent(m -> m.removeReplica(spreadDomain));
        }
      }

      private double getProjectedSizeOfReplica(Replica replica) {
        return attrValues
            .getCollectionMetrics(replica.getShard().getCollection().getName())
            .flatMap(colMetrics -> colMetrics.getShardMetrics(replica.getShard().getShardName()))
            .flatMap(ShardMetrics::getLeaderMetrics)
            .flatMap(lrm -> lrm.getReplicaMetric(ReplicaMetricImpl.INDEX_SIZE_GB))
            .orElse(0D);
      }

      /**
       * If there are more than one spreadDomains given in the cluster, then return a weight for
       * this node, given the number of replicas in its spreadDomain.
       *
       * 
For each Collection & Shard, sum up the number of replicas this node's SpreadDomain has
       * over the minimum SpreadDomain. Square each value before summing, to ensure that smaller
       * number of higher values are penalized more than a larger number of smaller values.
       *
       * @return the weight
       */
      private int getSpreadDomainWeight() {
        if (affinityPlacementContext.doSpreadAcrossDomains) {
          return affinityPlacementContext.spreadDomainUsage.values().stream()
              .flatMap(m -> m.values().stream())
              .mapToInt(rs -> rs.overMinimum(spreadDomain))
              .map(i -> i * i)
              .sum();
        } else {
          return 0;
        }
      }

      /**
       * If there are more than one SpreadDomains given in the cluster, then return a projected
       * SpreadDomain weight for this node and this replica.
       *
       * 
For the new replica's Collection & Shard, project the number of replicas this node's
       * SpreadDomain has over the minimum SpreadDomain.
       *
       * @return the weight
       */
      private int projectReplicaSpreadWeight(Replica replica) {
        if (replica != null && affinityPlacementContext.doSpreadAcrossDomains) {
          return Optional.ofNullable(
                  affinityPlacementContext.spreadDomainUsage.get(
                      replica.getShard().getCollection().getName()))
              .map(m -> m.get(replica.getShard().getShardName()))
              .map(rs -> rs.projectOverMinimum(spreadDomain, 1))
              .orElse(0);
        } else {
          return 0;
        }
      }

      /**
       * If there are more than one AvailabilityZones given in the cluster, then return a weight for
       * this node, given the number of replicas in its availabilityZone.
       *
       * 
For each Collection, Shard & ReplicaType, sum up the number of replicas this node's
       * AvailabilityZone has over the minimum AvailabilityZone. Square each value before summing,
       * to ensure that smaller number of higher values are penalized more than a larger number of
       * smaller values.
       *
       * @return the weight
       */
      private int getAZWeight() {
        if (affinityPlacementContext.allAvailabilityZones.size() < 2) {
          return 0;
        } else {
          return affinityPlacementContext.availabilityZoneUsage.values().stream()
              .flatMap(m -> m.values().stream())
              .flatMap(m -> m.values().stream())
              .mapToInt(rs -> rs.overMinimum(availabilityZone))
              .map(i -> i * i)
              .sum();
        }
      }

      /**
       * If there are more than one AvailabilityZones given in the cluster, then return a projected
       * AvailabilityZone weight for this node and this replica.
       *
       * For the new replica's Collection, Shard & ReplicaType, project the number of replicas
       * this node's AvailabilityZone has over the minimum AvailabilityZone.
       *
       * @return the weight
       */
      private int projectAZWeight(Replica replica) {
        if (replica == null || affinityPlacementContext.allAvailabilityZones.size() < 2) {
          return 0;
        } else {
          return Optional.ofNullable(
                  affinityPlacementContext.availabilityZoneUsage.get(
                      replica.getShard().getCollection().getName()))
              .map(m -> m.get(replica.getShard().getShardName()))
              .map(m -> m.get(replica.getType()))
              .map(rs -> rs.projectOverMinimum(availabilityZone, 1))
              .orElse(0);
        }
      }
    }

    private static class ReplicaSpread {
      private final Set allKeys;
      private final Map spread;
      private int minReplicasLocated;

      private ReplicaSpread(Set allKeys) {
        this.allKeys = allKeys;
        this.spread = new HashMap<>();
        this.minReplicasLocated = 0;
      }

      int overMinimum(String key) {
        return spread.getOrDefault(key, 0) - minReplicasLocated;
      }

      /**
       * Trying adding a replica for the given spread key, and return the {@link
       * #overMinimum(String)} with it added. Remove the replica, so that the state is unchanged
       * from when the method was called.
       */
      int projectOverMinimum(String key, int replicaDelta) {
        int overMinimum = overMinimum(key);
        if (overMinimum == 0 && replicaDelta > 0) {
          addReplica(key);
          int projected = overMinimum(key);
          removeReplica(key);
          return projected;
        } else {
          return Integer.max(0, overMinimum + replicaDelta);
        }
      }

      /**
       * Add a replica for the given spread key, returning whether a full resorting is needed for
       * AffinityNodes. Resorting is only needed if other nodes could possibly have a lower weight
       * than before.
       *
       * @param key the spread key for the replica that should be added
       * @return whether a re-sort is required
       */
      boolean addReplica(String key) {
        int previous = spread.getOrDefault(key, 0);
        spread.put(key, previous + 1);
        if (allKeys.size() > 0
            && spread.size() == allKeys.size()
            && previous == minReplicasLocated) {
          minReplicasLocated = spread.values().stream().mapToInt(Integer::intValue).min().orElse(0);
          return true;
        }
        return false;
      }

      void removeReplica(String key) {
        Integer replicasLocated = spread.computeIfPresent(key, (k, v) -> v - 1 == 0 ? null : v - 1);
        if (replicasLocated == null) {
          replicasLocated = 0;
        }
        if (replicasLocated < minReplicasLocated) {
          minReplicasLocated = replicasLocated;
        }
      }
    }
  }
}