
oracle.kv.impl.admin.topo.Rules Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of oracle-nosql-server Show documentation
Show all versions of oracle-nosql-server Show documentation
NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.
The newest version!
/*-
* Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
*
* This file was distributed by Oracle as part of a version of Oracle NoSQL
* Database made available at:
*
* http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
*
* Please see the LICENSE file included in the top-level directory of the
* appropriate version of Oracle NoSQL Database for a copy of the license and
* additional information.
*/
package oracle.kv.impl.admin.topo;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import oracle.kv.impl.admin.IllegalCommandException;
import oracle.kv.impl.admin.param.AdminParams;
import oracle.kv.impl.admin.param.ArbNodeParams;
import oracle.kv.impl.admin.param.Parameters;
import oracle.kv.impl.admin.param.RepNodeParams;
import oracle.kv.impl.admin.param.StorageNodeParams;
import oracle.kv.impl.admin.param.StorageNodePool;
import oracle.kv.impl.admin.topo.Validations.ANNotAllowedOnSN;
import oracle.kv.impl.admin.topo.Validations.ANProximity;
import oracle.kv.impl.admin.topo.Validations.ANWrongDC;
import oracle.kv.impl.admin.topo.Validations.BadAdmin;
import oracle.kv.impl.admin.topo.Validations.EmptyZone;
import oracle.kv.impl.admin.topo.Validations.ExcessANs;
import oracle.kv.impl.admin.topo.Validations.ExcessAdmins;
import oracle.kv.impl.admin.topo.Validations.ExcessRNs;
import oracle.kv.impl.admin.topo.Validations.HelperParameters;
import oracle.kv.impl.admin.topo.Validations.InsufficientANs;
import oracle.kv.impl.admin.topo.Validations.InsufficientAdmins;
import oracle.kv.impl.admin.topo.Validations.InsufficientRNs;
import oracle.kv.impl.admin.topo.Validations.MissingRootDirectorySize;
import oracle.kv.impl.admin.topo.Validations.MissingStorageDirectorySize;
import oracle.kv.impl.admin.topo.Validations.MissingAdminDirectorySize;
import oracle.kv.impl.admin.topo.Validations.MultipleRNsInRoot;
import oracle.kv.impl.admin.topo.Validations.NoPartition;
import oracle.kv.impl.admin.topo.Validations.NoPrimaryDC;
import oracle.kv.impl.admin.topo.Validations.NonOptimalNumPartitions;
import oracle.kv.impl.admin.topo.Validations.OverCapacity;
import oracle.kv.impl.admin.topo.Validations.RNHeapExceedsSNMemory;
import oracle.kv.impl.admin.topo.Validations.RNProximity;
import oracle.kv.impl.admin.topo.Validations.RulesProblem;
import oracle.kv.impl.admin.topo.Validations.StorageDirectorySizeImbalance;
import oracle.kv.impl.admin.topo.Validations.StorageNodeMissing;
import oracle.kv.impl.admin.topo.Validations.UnderCapacity;
import oracle.kv.impl.admin.topo.Validations.UnevenANDistribution;
import oracle.kv.impl.admin.topo.Validations.WrongAdminType;
import oracle.kv.impl.admin.topo.Validations.WrongNodeType;
import oracle.kv.impl.param.Parameter;
import oracle.kv.impl.param.ParameterMap;
import oracle.kv.impl.param.SizeParameter;
import oracle.kv.impl.topo.AdminType;
import oracle.kv.impl.topo.ArbNode;
import oracle.kv.impl.topo.ArbNodeId;
import oracle.kv.impl.topo.Datacenter;
import oracle.kv.impl.topo.DatacenterId;
import oracle.kv.impl.topo.DatacenterType;
import oracle.kv.impl.topo.Partition;
import oracle.kv.impl.topo.PartitionId;
import oracle.kv.impl.topo.RepGroup;
import oracle.kv.impl.topo.RepGroupId;
import oracle.kv.impl.topo.RepNode;
import oracle.kv.impl.topo.RepNodeId;
import oracle.kv.impl.topo.ResourceId;
import oracle.kv.impl.topo.StorageNode;
import oracle.kv.impl.topo.StorageNodeId;
import oracle.kv.impl.topo.Topology;
import com.sleepycat.je.rep.NodeType;
/**
* Topologies are governed by constraints on the relationships of components
* its use of physical resources.
*
* Component relationship constraints:
* ----------------------------------
* Each datacenter must have a complete copy of the data.
* A shard should be tolerant of a single point of failure, and to facilitate
* that, each RN of a shard must be on a different SN
* The number of partitions is fixed for the lifetime of a store.
* Each shard must have an RN in an SN in a primary data center
*
* Physical resource constraints:
* ------------------------------
* Each SN can only host a maximum of number of RNs
* Additional rules will be added about network affinity, physical fault groups,
* etc
*
* Inputs to a topology:
* DC Data center
* NS Network switch Optional
* SN Storage node Associated with a DC, possibly a NS
* RF Replication factor Per data center, associated with S
* P Partition Administrator # fixed for life of store
* C Capacity Administrator Per SN
* SD Storage directory Administrator Optional, per SN
* RD RN log directory Administrator Optional, per SN
* ADM Admin service Administrator
*
* Outputs (calculated by topology builder)
* S Shard (formally replication group)
* RN Replication node Associated with a S, assigned to a SN
* ARB Arbiter Associated with a S, assigned to a SN
*
* The following rules are used to enforce the constraints:
*
* 1. Each datacenter must have the same set of shards.
* 2. Each shard must have repFactor number of RNs
* 3. Each RN and arbiter of an S must reside on a different SN.
* 3a. If, and only if, the DC RF is 2, an ARB must be deployed for each S (on
* an SN different from the SNs hosting the shard's RNs).
* 4. The #RNSN {@literal <=} CSN
* 5. Each shard must have an RN in an SN in a primary datacenter
* 6. There should be no empty zones.
* 7. Arbiters will be hosted only when the Primary RF = 2 and when there is
* a primary datacenter that allow arbiters.
* 8. Arbiters are hosted in a single primary datacenter that allows arbiters.
* 9. Arbiters are hosted in a primary datacenter that is configured to host
* arbiter and the RF is zero if this type of datacenter exists.
* 10. Arbiters will be hosted only in SNs that are configured to allow
* hosting Arbiters.
* 11. Each shard must have one arbiter when the Primary RF = 2 and there is
* a primary datacenter that allows arbiters.
* 12. The arbiter hosting primary datacenter must have at least RF + 1 number
* of SNs (excluding capacity = 0 SNs that dont allow arbiters) and at
* least one SN that can host arbiters.
*
* Warnings:
*
* A SN is under capacity.
* A store should have a additional shards.
* A shard has no partitions.
*
*
* Not yet implemented:
* If NS are present, then depending on the network affinity mode2,
* 1. each RN of an S must reside on the same NS or
* 2. each RN of an S must reside on different NSs.
*
* The decision to deploy an ARB is per-DC; therefore each S could have an ARB
* in each DC.
*
* There are two network affinity modes, a) optimize for performance, requiring
* that replication nodes of the same shard reside on the same switch, and b)
* optimize for availability, requiring that replication nodes of the same
* shard reside on different switches.
*/
public class Rules {
private static final int MAX_REPLICATION_FACTOR = 20;
/**
* Validation checks that apply to a single topology are:
* -Every datacenter that is present in the pool of SNs used to deploy the
* topology has a complete copy of the data and holds RF number of Admins
* -all shards house at least one partition
* -all shards have the datacenter-specified repfactor in each datacenter
* -all shards have their RNs on different nodes.
* -each SN is within capacity.
*/
public static Results validate(Topology topo,
Parameters params,
boolean topoIsDeployed) {
Results results = new Results();
/*
* Do some processing of the topology and parameters to map out
* relationships that are embodied in the topo and params, but are
* not stated in a convenient way for validation.
*/
/*
* Hang onto which shards are in which datacenters, and how many
* RNs are in each shard (by datacenter).
*
* Also sort RNs by SN, for memory size validation.
*/
Map> shardsByDC =
new HashMap<>();
Map> rnsPerSN = new HashMap<>();
preProcessTopology(topo, shardsByDC, rnsPerSN);
/* Do all DCs have the same set of shards? */
checkDcsShardSets(results, shardsByDC, topo);
/* Do all shards for a DC have the right repfactor number of RNs? */
checkShardsRepfactor(results, shardsByDC, topo);
/* All RNs of a single shard must be on different SNs. */
checkShardRNsStatus(results, topo);
/* Each shard must have an RN in an SN in a primary DC */
checkShardRNSNPrimaryDC(results, shardsByDC, topo);
/* Are all SNs still available in the current store? */
checkSNsAvailable(results, topo, params);
// TODO - this check is suspect since parameters may not be set - move
// to topoIsDeployed clause?
/* Are all SNs are within capacity?*/
checkSNCapacity(results, topo, params);
/* Check the storage directories */
checkStorageDirectories(results, topo, params);
if (topoIsDeployed) {
/* Check RN node type versus zone type */
checkRepNodeTypes(topo, params, results);
/* Check the Admins */
checkAdmins(topo, params, results);
/*
* Check environment placement.
*/
findMultipleRNsInRootDir(topo, params, results);
/*
* SN memoryMB and RN heap are optional parameters. If these have
* been specified, make sure that the sum of all RN JVM heaps does
* not exceed the memory specified on the SN.
*/
checkMemoryUsageOnSN(rnsPerSN, params, results);
checkPartitionLayout(topo, params, results);
/* Check topology helper host parameters */
checkTopoHelperRules(topo, results, params);
}
/* Check for empty zone.*/
checkEmptyZone(topo, results);
/* Check all the arbiter rules */
checkArbiterRules(topo, results, params);
return results;
}
private static void preProcessTopology(Topology topo,
Map> shardsByDC,
Map> rnsPerSN) {
for (Datacenter dc : topo.getSortedDatacenters()) {
/*
* RF=0 datacenters hosts only arbiters and no repnodes. Skip this
* datacenter. Arbiter rules will be checked separately later.
*/
if (dc.getRepFactor() == 0) {
continue;
}
shardsByDC.put(dc.getResourceId(),
new HashMap());
}
for (RepNode rn : topo.getSortedRepNodes()) {
StorageNodeId snId = rn.getStorageNodeId();
StorageNode hostSN = topo.get(snId);
DatacenterId dcId = hostSN.getDatacenterId();
RepGroupId rgId = rn.getRepGroupId();
Map shardCount = shardsByDC.get(dcId);
Integer count = shardCount.get(rgId);
if (count == null) {
shardCount.put(rgId, 1);
} else {
shardCount.put(rgId, count + 1);
}
Set rnIds = rnsPerSN.get(snId);
if (rnIds == null) {
rnIds = new HashSet<>();
rnsPerSN.put(snId, rnIds);
}
rnIds.add(rn.getResourceId());
}
}
private static void checkDcsShardSets(Results results,
Map> shardsByDC,
Topology topo) {
Set allShardIds = topo.getRepGroupIds();
for (Map.Entry> dcInfo :
shardsByDC.entrySet()) {
/*
* Find the shards that are in the topology, but are not in this
* datacenter.
*/
Set shards = dcInfo.getValue().keySet();
Set missing = relComplement(allShardIds, shards);
DatacenterId dcId = dcInfo.getKey();
int repFactor = topo.get(dcId).getRepFactor();
for (RepGroupId rgId : missing) {
results.add(new InsufficientRNs(dcId, repFactor,
rgId, repFactor));
}
}
}
private static void checkShardsRepfactor(Results results,
Map> shardsByDC,
Topology topo) {
for (Map.Entry> dcInfo :
shardsByDC.entrySet()) {
/* RepFactor is set per datacenter */
DatacenterId dcId = dcInfo.getKey();
int repFactor = topo.get(dcId).getRepFactor();
for (Map.Entry shardInfo :
dcInfo.getValue().entrySet()) {
int numRNs = shardInfo.getValue();
if (numRNs < repFactor) {
results.add(new InsufficientRNs(dcId,
repFactor,
shardInfo.getKey(),
repFactor - numRNs));
} else if (numRNs > repFactor) {
results.add(new ExcessRNs(dcId,
repFactor,
shardInfo.getKey(),
numRNs - repFactor));
}
}
}
}
private static void checkShardRNsStatus(Results results, Topology topo) {
for (RepGroupId rgId : topo.getRepGroupIds()) {
/* Organize RNIds by SNId */
Map> rnIdBySNId = new HashMap<>();
for (RepNode rn : topo.get(rgId).getRepNodes()) {
StorageNodeId snId = rn.getStorageNodeId();
Set rnIds = rnIdBySNId.get(snId);
if (rnIds == null) {
rnIds = new HashSet<>();
rnIdBySNId.put(rn.getStorageNodeId(), rnIds);
}
rnIds.add(rn.getResourceId());
}
/*
* If any of the SNs used by this shard have more than one RN,
* report it.
*/
for (Map.Entry> entry :
rnIdBySNId.entrySet()) {
if (entry.getValue().size() > 1) {
StorageNodeId snId = entry.getKey();
results.add(new RNProximity(snId, rgId,
new ArrayList<>(entry.getValue())));
}
}
}
}
private static void checkShardRNSNPrimaryDC(Results results,
Map> shardsByDC,
Topology topo) {
final Set shardsWithoutPrimaryDC = topo.getRepGroupIds();
for (final Map.Entry> dcInfo :
shardsByDC.entrySet()) {
final DatacenterId dcId = dcInfo.getKey();
final Datacenter dc = topo.get(dcId);
if (dc.getDatacenterType().isPrimary()) {
final Map repGroupInfo =
dcInfo.getValue();
shardsWithoutPrimaryDC.removeAll(repGroupInfo.keySet());
}
}
for (final RepGroupId rgId : shardsWithoutPrimaryDC) {
results.add(new NoPrimaryDC(rgId));
}
}
private static void checkSNsAvailable(Results results,
Topology topo,
Parameters params) {
for (StorageNodeId snId : topo.getStorageNodeIds()) {
final StorageNodeParams snParams = params.get(snId);
/*
* snParams may be null if the Storage Node(s) does not exist
* in the current store. Might be removed by the user earlier.
*/
if (snParams == null) {
results.add(new StorageNodeMissing(snId));
}
}
}
private static void checkSNCapacity(Results results,
Topology topo,
Parameters params) {
CapacityProfile profile = getCapacityProfile(topo, params);
for (RulesProblem p : profile.getProblems()) {
results.add(p);
}
/* Warn about under capacity SNs. */
for (RulesProblem w : profile.getWarnings()) {
results.add(w);
}
}
private static void checkStorageDirectories(Results results,
Topology topo, Parameters params) {
/*
* Check the storage directories for RN env, RN log
* and Admin env
*/
for (StorageNodeId snId : topo.getStorageNodeIds()) {
final StorageNodeParams snp = params.get(snId);
/*
* snp may be null if the Storage Node(s) does not exist
* in the current store. Might be removed by the user earlier.
*/
if (snp == null) {
continue;
}
final ParameterMap sdMap = snp.getStorageDirMap();
if(sdMap != null && !sdMap.isEmpty()) {
/* No storage size specified for storage directories */
final List paths = new ArrayList<>();
for (Parameter p : sdMap) {
if (SizeParameter.getSize(p) <= 0) {
paths.add(p.getName());
}
}
if (!paths.isEmpty()) {
results.add(new MissingStorageDirectorySize(snId,
snp.getStorageDirPaths()));
}
} else {
String rootDirPath = snp.getRootDirPath();
long rootDirSize = snp.getRootDirSize();
if (rootDirSize <= 0) {
results.add(new MissingRootDirectorySize(snId,
rootDirPath));
}
}
/*
* TODO : We will need to enable MissingRNlogDirectorySize
* check since currently we may not be exposing RN log dir size
* initially. Currently just added support.
*/
final ParameterMap rnLogMap = snp.getRNLogDirMap();
if(rnLogMap != null && !rnLogMap.isEmpty()) {
/* No RN log size specified for RN log directories */
final List paths = new ArrayList<>();
for (Parameter p : rnLogMap) {
if (SizeParameter.getSize(p) <= 0) {
paths.add(p.getName());
}
}
if (!paths.isEmpty()) {
/*results.add(new MissingRNLogDirectorySize(snId,
snp.getRNLogDirPaths()));*/
}
}
final ParameterMap adminDirMap = snp.getAdminDirMap();
if(adminDirMap != null && !adminDirMap.isEmpty()) {
/* No admin dirsize specified for admin directory */
final List paths = new ArrayList<>();
for (Parameter p : adminDirMap) {
if (SizeParameter.getSize(p) <= 0) {
paths.add(p.getName());
}
}
if (!paths.isEmpty()) {
results.add(new MissingAdminDirectorySize(snId,
snp.getAdminDirPaths()));
}
}
}
}
/**
* Only to be called for AN/RN
*/
private static void checkHelpers(Topology topo,
ResourceId rId,
Parameters params,
Results results) {
String helpers;
String myNodeHostPort = null;
if (rId instanceof RepNodeId) {
RepNodeParams rnp = params.get((RepNodeId)rId);
if (rnp == null) {
return;
}
helpers = rnp.getJEHelperHosts();
myNodeHostPort = rnp.getJENodeHostPort();
} else {
ArbNodeParams anp = params.get((ArbNodeId)rId);
if (anp == null) {
return;
}
helpers = anp.getJEHelperHosts();
myNodeHostPort = anp.getJENodeHostPort();
}
/* Find the HA node host addresses for each member of the shard. */
Set nodeHosts = getTopoHelpers(topo, rId, params);
/*
* Check the number of helpers. Note that a half-run plan
* may have not have the proper number of helpers.
*/
StringTokenizer t = new StringTokenizer(helpers, ",");
while (t.hasMoreElements()) {
String unoHelper = t.nextToken().trim();
if (unoHelper.equals(myNodeHostPort)) {
continue;
}
if (!nodeHosts.contains(unoHelper)) {
/* Helper host parameters don't match topology */
Iterator nodeHost = nodeHosts.iterator();
StringBuilder sb = new StringBuilder();
while (nodeHost.hasNext()) {
if (sb.length() > 0) {
sb.append(", ");
}
sb.append(nodeHost.next());
}
String topoHelpersAsString = sb.toString();
results.add(
new HelperParameters(rId,
topoHelpersAsString,
helpers));
}
}
}
/**
* Only to be called for AN/RN to get
* the list of helper hosts defined by the topology.
* @param topo
* @param rId
* @return helper hosts
*/
public static Set getTopoHelpers(Topology topo,
ResourceId rId,
Parameters params) {
RepGroup rg;
HashSet nodeHosts = new HashSet();
if (rId instanceof RepNodeId) {
RepNode rn = topo.get((RepNodeId)rId);
rg = topo.getRepGroupMap().get(rn.getRepGroupId());
} else {
ArbNode an = topo.get((ArbNodeId)rId);
rg = topo.getRepGroupMap().get(an.getRepGroupId());
}
/* Find the HA node host addresses for each member of the shard. */
for (RepNode rn: rg.getRepNodes()) {
RepNodeParams rnp = params.get(rn.getResourceId());
if (rnp != null) {
if (!rn.getResourceId().equals(rId)) {
nodeHosts.add(rnp.getJENodeHostPort());
}
}
}
for (ArbNode an: rg.getArbNodes()) {
ArbNodeParams anp = params.get(an.getResourceId());
if (anp != null) {
if (!an.getResourceId().equals(rId)) {
nodeHosts.add(anp.getJENodeHostPort());
}
}
}
return nodeHosts;
}
/*
* Check partition assignments. If a redistribution plan is interrupted
* shards may have non-optimal numbers of partitions.
*/
private static void checkPartitionLayout(Topology topo,
Parameters params,
Results results) {
/*
* Find shards that have no partitions. This flavor of createShardMap
* retains shards that have 0 partitions.
*/
final Map shardMap = createShardMap(topo);
for (ShardDescriptor sd : shardMap.values()) {
/* Warn that this shard had no partitions at all. */
if (sd.getNumPartitions() == 0) {
results.add(new NoPartition(sd.getGroupId()));
}
}
if (topo.getPartitionMap().isEmpty()) {
return;
}
final Map shards =
createShardMap(topo, params);
/* Record the shards that are over or under target number. */
for (ShardDescriptor sd : shards.values()) {
if (sd.getOverUnder() != 0) {
results.add(new NonOptimalNumPartitions
(sd.getGroupId(), sd.getNumPartitions(),
sd.getTargetNumPartitions(),
sd.getTargetNumPartitions()));
}
// TODO - check for target number 0? Maybe use that to indicate
// the directory is too small for the store to be balanced?
/* Check size discrepancy */
if (sd.isSizeImbalanced()) {
results.add(new StorageDirectorySizeImbalance(sd.getGroupId(),
sd.getMinDirSize(),
sd.getMaxDirSize()));
}
}
}
private static void checkTopoHelperRules(Topology topo,
Results results,
Parameters params) {
for (RepNodeId rnId : topo.getRepNodeIds()) {
checkHelpers(topo, rnId, params, results);
}
for (ArbNodeId anId : topo.getArbNodeIds()) {
checkHelpers(topo, anId, params, results);
}
}
private static void checkArbiterRules(Topology topo, Results results,
Parameters params) {
if (!ArbiterTopoUtils.useArbiters(topo)) {
/*
* Not using Arbiters so check if there are any.
*/
findArbitersToBeDeleted(topo, results);
return;
}
/*
* Find the best primary DC that allows arbiters in the store
*/
DatacenterId bestArbDcId =
ArbiterTopoUtils.getBestArbiterDC(topo, params);
/*
* This flag is used to determine if AN distribution checks
* should be performed. If there are other violations that
* once corrected would change the AN distribution, then the
* AN distribution check is not performed.
*/
boolean checkDistribution = true;
/*
* Within the primary arbiter hosting datacenter, the arbiters will be
* hosted in those SNs that allows arbiters.
*/
HashMap snANCountMap = new HashMap<>();
for (ArbNodeId arbNodeId : topo.getArbNodeIds()) {
ArbNode arbNode = topo.get(arbNodeId);
StorageNodeId snId = arbNode.getStorageNodeId();
/*
* Check if the arbiter is hosted in the SN that does not allow
* arbiters.
*/
if (!params.get(snId).getAllowArbiters()) {
checkDistribution = false;
results.add(new ANNotAllowedOnSN(arbNodeId, snId, bestArbDcId));
}
Integer anCount = snANCountMap.get(snId);
if (anCount == null) {
anCount = Integer.valueOf(1);
} else {
anCount = Integer.valueOf(anCount + 1);
}
snANCountMap.put(snId, anCount);
}
/*
* Arbiters will be hosted in a single primary DC that allows
* ANs. Add violations for ANs hosted outside of the choosen
* Arbiter DC.
*/
for (ArbNodeId arbNodeId : topo.getArbNodeIds()) {
ArbNode arbNode = topo.get(arbNodeId);
StorageNodeId snId = arbNode.getStorageNodeId();
DatacenterId dcId = topo.get(snId).getDatacenterId();
if (dcId.equals(bestArbDcId)) {
continue;
}
/*
* Report all the arbiters that needs to be transfered
*/
results.add(new ANWrongDC(dcId, bestArbDcId, arbNodeId));
checkDistribution = false;
}
/*
* Check and add violation if shard does not
* have an AN.
*/
Set shardsWithNoArbs = topo.getRepGroupIds();
for (ArbNodeId arbNodeId : topo.getArbNodeIds()) {
shardsWithNoArbs.remove(topo.get(arbNodeId).getRepGroupId());
}
for (RepGroupId rgId : shardsWithNoArbs) {
/*
* Report the repgroups with no arbiters.
*/
results.add(new InsufficientANs(rgId, bestArbDcId));
checkDistribution = false;
}
/* All RNs and AN of a single shard must be on different SNs */
for (RepGroupId rgId : topo.getRepGroupIds()) {
/* Organize RNIds by SNId */
Map> rnIdBySNId = new HashMap<>();
for (RepNode rn : topo.get(rgId).getRepNodes()) {
StorageNodeId snId = rn.getStorageNodeId();
Set rnIds = rnIdBySNId.get(snId);
if (rnIds == null) {
rnIds = new HashSet<>();
rnIdBySNId.put(rn.getStorageNodeId(), rnIds);
}
rnIds.add(rn.getResourceId());
}
for (ArbNodeId anId : topo.getArbNodeIds()) {
ArbNode an = topo.get(anId);
Set rns = rnIdBySNId.get(an.getStorageNodeId());
if (rns != null) {
ArrayList rnsInGrp = null;
for (RepNodeId rnId : rns) {
RepNode rn = topo.get(rnId);
if (rn.getRepGroupId().equals(an.getRepGroupId())) {
if (rnsInGrp == null) {
rnsInGrp = new ArrayList<>();
}
rnsInGrp.add(rnId);
}
}
if (rnsInGrp != null) {
results.add(new ANProximity(an.getStorageNodeId(),
rgId,
rnsInGrp,
anId));
checkDistribution = false;
}
}
}
}
/* Check for more than one AN in a RepGroup. */
for (final RepGroup rg : topo.getRepGroupMap().getAll()) {
boolean found = false;
for (final ArbNode an : rg.getArbNodes()) {
if (!found) {
found = true;
} else {
results.add(new ExcessANs(rg.getResourceId(),
an.getResourceId()));
checkDistribution = false;
}
}
}
/* Make checks for AN distribution */
if (!checkDistribution) {
/*
* Skip the AN distribution check if there are violations
* that if corrected would change the AN distribution.
*/
return;
}
DatacenterId anDCId = null;
for (StorageNodeId snId : snANCountMap.keySet()) {
if (anDCId == null) {
anDCId = topo.get(snId).getDatacenterId();
} else if (!topo.get(snId).getDatacenterId().equals(anDCId)) {
return;
}
}
int maxNumAN =
ArbiterTopoUtils.computeZoneMaxANsPerSN(topo, params, anDCId);
if (maxNumAN == 0) {
/* No usable SNs */
return;
}
for (Entry entry : snANCountMap.entrySet()) {
if (entry.getValue() > maxNumAN) {
results.add(new UnevenANDistribution(entry.getKey(),
entry.getValue(),
maxNumAN));
}
}
}
/**
* Check for all the repgroups if they contain an arbiter when RF>2.
*/
private static void findArbitersToBeDeleted(Topology topo,
Results results) {
for (ArbNodeId arbNodeId : topo.getArbNodeIds()) {
ArbNode arbNode = topo.get(arbNodeId);
RepGroupId rgId = arbNode.getRepGroupId();
results.add(new ExcessANs(rgId, arbNodeId));
}
}
private static void checkEmptyZone(Topology topo, Results results) {
/*
* Set containing all the zones in the topology.
*/
Set zoneSet = new HashSet<>();
zoneSet.addAll(topo.getDatacenterMap().getAll());
/*
* Iterate over all storage nodes. Remove the zones from the zoneSet
* which has atleast one SN.
*/
for (StorageNode sn : topo.getStorageNodes(null)) {
zoneSet.remove(topo.get(sn.getDatacenterId()));
}
/*
* Report the Empty Zones.
*/
for (Datacenter dc : zoneSet) {
results.add(new EmptyZone(dc.getResourceId()));
}
}
private static void checkRepNodeTypes(Topology topo,
Parameters params,
Results results) {
for (final RepGroup rg : topo.getRepGroupMap().getAll()) {
for (final RepNode rn : rg.getRepNodes()) {
final RepNodeId rnId = rn.getResourceId();
final RepNodeParams rnp = params.get(rnId);
/*
* We expect each RN to have parameters, but they might not
* temporarily, due to a race condition. There can only be an
* incorrect node type if there are parameters, so there is
* nothing more to check if the parameters are missing.
*/
if (rnp == null) {
continue;
}
final NodeType nodeType = rnp.getNodeType();
final Datacenter zone =
topo.getDatacenter(rn.getStorageNodeId());
final NodeType defaultNodeType =
Datacenter.ServerUtil.getDefaultRepNodeType(zone);
if (nodeType != defaultNodeType) {
results.add(new WrongNodeType(rnId,
nodeType,
zone.getResourceId(),
zone.getDatacenterType()));
}
}
}
}
/**
* Checks that each zone has at least RF number of Admins. Also checks
* if the type of the deployed Admins matches the type of the zone.
*/
private static void checkAdmins(Topology topo,
Parameters params,
Results results) {
/* Map of zone:# of Admins in that zone */
final Map adminMap = new HashMap<>();
for (AdminParams ap : params.getAdminParams()) {
final StorageNode sn =
topo.getStorageNodeMap().get(ap.getStorageNodeId());
/* Admin parameters pointing to a non-existent SN */
if (sn == null) {
results.add(new BadAdmin(ap.getAdminId(),
ap.getStorageNodeId()));
continue;
}
final Datacenter dc = topo.get(sn.getDatacenterId());
Integer count = adminMap.get(dc.getResourceId());
if (count == null) {
count = 0;
}
count++;
adminMap.put(dc.getResourceId(), count);
final AdminType adminType = ap.getType();
final DatacenterType zoneType = dc.getDatacenterType();
/* Check that the Admin type matches the zone type */
if (!(adminType.isPrimary() && zoneType.isPrimary() ||
adminType.isSecondary() && zoneType.isSecondary())) {
/* Admin - zone type missmatch */
results.add(new WrongAdminType(ap.getAdminId(), ap.getType(),
dc.getResourceId(),
dc.getDatacenterType()));
}
}
/* Check the number of Admins in each zone */
for (Datacenter dc : topo.getDatacenterMap().getAll()) {
final int zoneRF = dc.getRepFactor();
final Integer numAdmins = adminMap.get(dc.getResourceId());
final int difference = (numAdmins == null) ? -zoneRF :
numAdmins - zoneRF;
/* Not enough - violation */
if (difference < 0) {
results.add(new InsufficientAdmins(dc.getResourceId(),
zoneRF, -difference));
continue;
}
/* Too many - note */
if (difference > 0) {
results.add(new ExcessAdmins(dc.getResourceId(), zoneRF,
difference));
continue;
}
/* numAdmins == zoneRF - just right */
}
}
private static void findMultipleRNsInRootDir(Topology topo,
Parameters params,
Results results) {
/*
* Highlight SNs which have multiple RNs in the root directory. This
* validation step is only possible when validate has been called on a
* deployed topology. Before a deployment, the RepNodeParams mountPoint
* field is not initialized, and the RepNodeParams themselves may not
* exist.
*/
Map> rnsOnRoot = new HashMap<>();
for (RepNode rn: topo.getSortedRepNodes()) {
StorageNodeId snId = rn.getStorageNodeId();
RepNodeId rnId = rn.getResourceId();
RepNodeParams rnp = params.get(rnId);
if (rnp == null) {
continue;
}
if (rnp.getStorageDirectoryPath() == null) {
List onRoot = rnsOnRoot.get(snId);
if (onRoot == null) {
onRoot = new ArrayList<>();
rnsOnRoot.put(snId, onRoot);
}
onRoot.add(rnId);
}
}
for (Map.Entry> snRoot :
rnsOnRoot.entrySet()) {
if (snRoot.getValue().size() == 1) {
/* 1 RN on the root, no issue with that. */
continue;
}
StorageNodeId snId = snRoot.getKey();
results.add(new MultipleRNsInRoot(snId,
snRoot.getValue(),
params.get(snId).getRootDirPath()));
}
}
/*
* For SNs that have a memory_mb param value, check that the heap of the
* RNs hosted on that SN don't exceed the SN memory.
*/
private static void checkMemoryUsageOnSN(Map> rnsPerSN,
Parameters params,
Results results) {
for (Map.Entry> entry :
rnsPerSN.entrySet()) {
StorageNodeId snId = entry.getKey();
StorageNodeParams snp = params.get(snId);
if (snp.getMemoryMB() == 0) {
continue;
}
long totalRNHeapMB = 0;
Set rnIds = entry.getValue();
for (RepNodeId rnId : rnIds) {
RepNodeParams rnp = params.get(rnId);
if (rnp == null) {
continue;
}
totalRNHeapMB += params.get(rnId).getMaxHeapMB();
}
if (totalRNHeapMB > snp.getMemoryMB()) {
StringBuilder sb = new StringBuilder();
for (RepNodeId rnId : rnIds) {
sb.append(rnId).append(" ").
append(params.get(rnId).getMaxHeapMB()).append("MB,");
}
results.add(new RNHeapExceedsSNMemory(snId, snp.getMemoryMB(),
rnIds, totalRNHeapMB,
sb.toString()));
}
}
}
/**
* Check that a transition from source to candidate topology is possible.
*
* -Once deployed, the number of partitions in a store cannot change.
* -A shard cannot be concurrently involved in a partition migration AND
* have an RN that is being moved from one SN to another - TODO: move to
* task generator class
*
* -For R2, we do not support contraction, so there cannot be
* -shards that are in the source, but are not in the candidate.
* -RNs that are in the source, but are not in the candidate.
*/
public static void validateTransition(Topology source,
TopologyCandidate candidate,
Parameters params) {
Topology target = candidate.getTopology();
/*
* Make sure that all the SNs and storage directories in the candidate
* still exist. It's possible that the user has removed them.
*/
for (StorageNodeId snId : target.getStorageNodeIds()) {
if (params.get(snId) == null) {
throw new IllegalCommandException
("Topology candidate " + candidate.getName() + " uses " +
snId + " but " + snId + " has been removed.");
}
}
checkAllStorageDirsExist(candidate, params);
/*
* Make sure that all RN log directories in the candidate
* still exist. It is possible that user has removed them.
* Although currently we do not support change/remove rnlogdir option
* but its in TODO.
*/
checkAllRNLogDirsExist(candidate, params);
/*
* TODO : Need to check for admindir existence before transition.
* Admin dir need to be part of topology candidate then.
*/
if (source.getRepGroupMap().isEmpty()) {
/*
* Right now, if there are no shards in the source, there is
* nothing else to check in the transition.
*/
return;
}
/*
* If the store has already been deployed, check that the number of
* partitions has not changed.
*/
Collection sourcePartitionIds =
source.getPartitionMap().getAllIds();
Collection targetPartitionIds =
target.getPartitionMap().getAllIds();
/* The source and target must have the same number of partitions. */
if ((!sourcePartitionIds.isEmpty()) &&
(sourcePartitionIds.size() != targetPartitionIds.size())) {
throw new IllegalCommandException
("The current topology has been initialized and has " +
sourcePartitionIds.size() +
" partitions while the target (" + candidate.getName() +
")has " + targetPartitionIds.size() +
" partitions. The number of partitions in a store cannot be"+
" changed");
}
/* The source and target must have the same set of partitions. */
Set mustExistCopy = new HashSet<>(sourcePartitionIds);
mustExistCopy.removeAll(targetPartitionIds);
if (!mustExistCopy.isEmpty()) {
throw new IllegalCommandException
("These partitions are missing from " +
candidate.getName() + ":" + mustExistCopy);
}
/*
* In the two operations adding RNs and removing RNs, only one of them
* is supported at same time.
*/
Set sourceRNsCopy = new HashSet<>(source.getRepNodeIds());
Set targetRNsCopy = target.getRepNodeIds();
sourceRNsCopy.removeAll(target.getRepNodeIds());
targetRNsCopy.removeAll(source.getRepNodeIds());
if (!sourceRNsCopy.isEmpty() && !targetRNsCopy.isEmpty()) {
throw new IllegalCommandException
("Adding Rep Nodes and Removing Rep Nodes are not supported " +
"at the same time in the " + candidate.getName() + ". " +
"Adding Rep Nodes: " + targetRNsCopy + ", " +
"removing Rep Nodes: " + sourceRNsCopy);
}
}
public static void checkAllStorageDirsExist(TopologyCandidate candidate,
Parameters params) {
for (Map.Entry entry :
candidate.getDirectoryAssignments().entrySet()) {
final RepNodeId rnId = entry.getKey();
final String path = entry.getValue();
final StorageNodeId snId =
candidate.getTopology().get(rnId).getStorageNodeId();
if ((path != null) &&
!directoryExists(path,
params.get(snId).getStorageDirPaths())) {
throw new IllegalCommandException
("Topology candidate " + candidate.getName() +
" uses storage directory " + snId + ", " + path +
" but it has been removed.");
}
}
}
public static void checkAllRNLogDirsExist(TopologyCandidate candidate,
Parameters params) {
for (Map.Entry entry :
candidate.getRNLogDirectoryAssignments().entrySet()) {
final RepNodeId rnId = entry.getKey();
final String path = entry.getValue();
final StorageNodeId snId =
candidate.getTopology().get(rnId).getStorageNodeId();
if ((path != null) &&
!directoryExists(path,
params.get(snId).getRNLogDirPaths())) {
throw new IllegalCommandException
("Topology candidate " + candidate.getName() +
" uses RN log directory " + snId + ", " + path +
" but it has been removed.");
}
}
}
private static boolean directoryExists(String storageDir,
List sdList) {
for (String sd : sdList) {
if (sd.equals(storageDir)) {
return true;
}
}
return false;
}
/**
* Ensure that the replication factor is in the valid range.
*/
public static void validateReplicationFactor(final int repFactor) {
if (repFactor < 0 || repFactor > MAX_REPLICATION_FACTOR) {
throw new IllegalArgumentException
("Illegal replication factor: " + repFactor +
", valid range is 0 to " + MAX_REPLICATION_FACTOR);
}
}
/**
* Ensure that Arbiters can only be specified for primary
* datacenters.
*/
public static void validateArbiter(final boolean allowArbiters,
final DatacenterType dt) {
if (allowArbiters && !dt.equals(DatacenterType.PRIMARY)) {
throw new IllegalArgumentException("Allow Arbiters only allowed "+
"on primary zones.");
}
}
public static void validateMasterAffinity(final boolean masterAffinity,
final DatacenterType dt) {
if (masterAffinity && !dt.equals(DatacenterType.PRIMARY)) {
throw new IllegalArgumentException("Master affinity only allowed " +
"on primary zones.");
}
}
/**
* Check if "rn" can be placed on this SN. Account for constraints that can
* be considered in the scope of a single SN, like capacity and RN
* consanguinity.
*/
static boolean checkRNPlacement(StorageNodeParams snp,
Set assignedRNs,
int proposedRG,
boolean ignoreCapacity,
Logger logger) {
if (!ignoreCapacity &&
(assignedRNs.size() >= snp.getCapacity())) {
logger.log(Level.FINEST, "{0} capacity={1}, num hosted RNS={2}",
new Object[]{snp.getStorageNodeId(), snp.getCapacity(),
assignedRNs.size()});
return false;
}
for (RepNodeId r : assignedRNs) {
if (r.getGroupId() == proposedRG) {
logger.log(Level.FINEST,
"{0} already has RN {1} from same shard as {2}",
new Object[]{snp.getStorageNodeId(), r, proposedRG});
return false;
}
}
return true;
}
/**
* Return the relative complement of A and B. That is,return all the items
* in A that are not also in B.
*/
private static Set relComplement(Set a,
Set b) {
Set copyA = new HashSet<>(a);
copyA.removeAll(b);
return copyA;
}
/**
* Checks whether all or no storage directories have sizes. Returns an empty
* set of all storage directories have sizes or none do. Otherwise, the set
* of SNs which have storage directories that are missing sizes. The union
* set of SNs in the specified topology as well as the pool are checked.
*
* If an SN does not have any explicit storage directories, or the capacity
* is greater than the number of explicit storage directories, the size of
* the root directory is checked.
*
* @return the set of SNs that are missing storage directory sizes
*/
static Set validateStorageDirSizes(Topology topo,
StorageNodePool pool,
Parameters params) {
final Set allSNs = new HashSet<>(pool.getList());
allSNs.addAll(topo.getStorageNodeIds());
final Set noSizes = new HashSet<>();
boolean sizeSpecified = false;
for (StorageNodeId snId : allSNs) {
final StorageNodeParams snp = params.get(snId);
final ParameterMap sdMap = snp.getStorageDirMap();
// TODO can sdMap be null?
final int numStorageDirs = (sdMap == null) ? 0 : sdMap.size();
/*
* If the capacity is > the number of defined storage directories
* check whether the root size was specified.
*/
if (snp.getCapacity() > numStorageDirs) {
if (snp.getRootDirSize() > 0) {
sizeSpecified = true;
} else {
noSizes.add(snId);
}
}
if (sdMap != null) {
for (Parameter p : sdMap) {
if (SizeParameter.getSize(p) > 0) {
sizeSpecified = true;
} else {
noSizes.add(snId);
}
}
}
}
if (!sizeSpecified) {
noSizes.clear();
}
return noSizes;
}
/**
* Checks whether all or no RN log directories have sizes. Returns an empty
* set of all RN log directories have sizes or none do. Otherwise, the set
* of SNs which have RN log directories that are missing sizes. The union
* set of SNs in the specified topology as well as the pool are checked.
*
* This code is currently unused and will be added in topology
* builder when rnlogdirsize will be exposed.
*
* @return the set of SNs that are missing RN log directory sizes
*/
static Set validateRNLogDirSizes(Topology topo,
StorageNodePool pool,
Parameters params) {
final Set allSNs = new HashSet<>(pool.getList());
allSNs.addAll(topo.getStorageNodeIds());
final Set noSizes = new HashSet<>();
boolean sizeSpecified = false;
for (StorageNodeId snId : allSNs) {
final StorageNodeParams snp = params.get(snId);
final ParameterMap rnlMap = snp.getRNLogDirMap();
if (rnlMap != null) {
for (Parameter p : rnlMap) {
if (SizeParameter.getSize(p) > 0) {
sizeSpecified = true;
} else {
noSizes.add(snId);
}
}
}
}
if (!sizeSpecified) {
noSizes.clear();
}
return noSizes;
}
/**
* Creates a map of shard descriptors based on the specified topology
* candidate. Shard minimum directory sizes will be calculated for each
* shard based on information from the parameters and candidate. If the
* candidate's topology is uninitialized an empty map is returned.
*/
static SortedMap
createShardMap(TopologyCandidate candidate,
Parameters params) {
final Topology topo = candidate.getTopology();
final int numPartitions = topo.getPartitionMap().size();
if (topo.getRepGroupMap().isEmpty() || (numPartitions == 0)) {
return new TreeMap<>();
// For when we can use Java 8
//return Collections.emptySortedMap();
}
final SortedMap shards =
createShardMap(topo);
/* Collect directory size information for the nodes in each shard */
collectDirSizes(shards, candidate, topo, params);
return shards;
}
/**
* Creates a map of shard descriptors based on the specified topology
* candidate. Shard minimum directory sizes and target partition
* numbers will be calculated for each shard from the candidate's
* storage directory information. Shards specified in
* removedRgIds will be configured for removal.
*/
static SortedMap
createShardMap(TopologyCandidate candidate,
Parameters params,
List removedRgIds,
int numPartitions) {
if (numPartitions <= 0) {
throw new IllegalArgumentException("Invalid number of partitions: "+
numPartitions);
}
final Topology topo = candidate.getTopology();
if (topo.getRepGroupMap().isEmpty()) {
throw new IllegalStateException("Attempt to create shard map from "+
"uninitialized topology");
}
final SortedMap shards =
createShardMap(topo);
/* Collect directory size information for the nodes in each shard */
collectDirSizes(shards, candidate, topo, params);
/* Mark shards to be removed */
for (RepGroupId rgId : removedRgIds) {
shards.get(rgId).targetForRemoval();
}
return setPartitionTargets(topo, shards, numPartitions);
}
/**
* Creates a map of shard descriptors based on the specified topology.
* Shard minimum directory sizes and target partition numbers will be
* calculated for each shard based on information from the parameters.
* If the topology is uninitialized an empty map is returned.
*/
private static SortedMap
createShardMap(Topology topo,
Parameters params) {
final int numPartitions = topo.getPartitionMap().size();
if (topo.getRepGroupMap().isEmpty() || (numPartitions == 0)) {
return new TreeMap<>();
// For when we can use Java 8
//return Collections.emptySortedMap();
}
final SortedMap shards =
createShardMap(topo);
/* Collect directory size information for the nodes in each shard */
collectDirSizes(shards, null, topo, params);
return setPartitionTargets(topo, shards, numPartitions);
}
/**
* Creates a map of shard descriptors based on the specified topology. The
* shard descriptors are only initialized with partition counts if
* available.
*/
private static SortedMap
createShardMap(Topology topo) {
/* Create descriptor for each shard */
final SortedMap shards = new TreeMap<>();
for (RepGroupId rgId : topo.getRepGroupIds()) {
shards.put(rgId, new ShardDescriptor(rgId));
}
/* Populate the assigned partitions */
for (Partition p: topo.getPartitionMap().getAll()) {
shards.get(p.getRepGroupId()).addPartition(p.getResourceId());
}
return shards;
}
/**
* Collects directory size information for the nodes in each shard. If
* candidate is not null the candidate assignments are checked first for
* a directory path otherwise the path is obtained from the rep node
* parameters. The directory size is obtained from the storage node
* parameters. (see TopologyCandidate.getStorageDir)
*/
private static void collectDirSizes(
SortedMap shards,
TopologyCandidate candidate,
Topology topo,
Parameters params) {
for (RepNode rn : topo.getSortedRepNodes()) {
final long size =
TopologyCandidate.getStorageDir(rn.getResourceId(),
candidate,
topo, params).getSize();
shards.get(rn.getRepGroupId()).setDirSize(size);
}
}
/**
* Sets the target number of partitions in the shard descriptors. The
* modified map of descriptors is returned.
*
* Note that the algorithm below assumes that the directory sizes are
* much larger than the number of partitions, at least an order of
* magnitude or two. In practice the difference will be many orders so
* not an issue. The problem may arise during testing where someone uses
* a small number for a directory size. In this case the partition
* distribution could be a little off due to rounding errors.
*/
private static SortedMap
setPartitionTargets(Topology topo,
SortedMap shards,
int numPartitions) {
/*
* Find the total size available for the
* store as well as the smallest shard.
*/
long totalDirSize = 0L;
long smallestDirSize = Long.MAX_VALUE;
for (ShardDescriptor sd : shards.values()) {
if (sd.isTargetForRemoval()) {
continue;
}
long minDirSize = sd.getMinDirSize();
totalDirSize += minDirSize;
if (minDirSize < smallestDirSize) {
smallestDirSize = minDirSize;
}
}
/* This should allready be checked, but check again */
if ((totalDirSize > 0) && (smallestDirSize <= 0)) {
throw new IllegalCommandException("not all storage directories " +
"have sizes");
}
/* The minimum # of partitions per shard */
int minNumPartitions;
/* The amount of directory needed for one partition */
long bytesPerPartition = 0L;
if (totalDirSize == 0) {
final int totalShards = topo.getRepGroupMap().size();
/*
* We don't know anything, so evenly distribute partitions across
* shards
*/
minNumPartitions = numPartitions / totalShards;
} else {
bytesPerPartition = totalDirSize / numPartitions;
minNumPartitions = (bytesPerPartition > 0) ?
(int)(smallestDirSize / bytesPerPartition) : 0;
}
/* If calculations end up at 0, set to at least 1 partition per shard */
if (minNumPartitions == 0) {
minNumPartitions = 1;
}
int totalAssigned = 0;
for (ShardDescriptor sd : shards.values()) {
totalAssigned +=
sd.setTargetNumPartitions(minNumPartitions,
bytesPerPartition,
numPartitions - totalAssigned);
// TODO - how about checking for 0 meaning the directory was too small
if (totalAssigned >= numPartitions) {
break;
}
}
/* Assign remaining partitions by spreading them across shards */
while (totalAssigned < numPartitions) {
for (ShardDescriptor sd : shards.values()) {
if (sd.isTargetForRemoval()) {
continue;
}
sd.addTargetNumPartition();
totalAssigned++;
if (totalAssigned >= numPartitions) {
break;
}
}
}
if (totalAssigned != numPartitions) {
throw new IllegalStateException("Internal error calculating " +
"partition targets");
}
return shards;
}
/**
* Shard descriptor. When inserted into a TreeSet the descriptors will sort
* in order of the number of partitions, least to most.
*/
static class ShardDescriptor implements Comparable {
private final RepGroupId rgId;
private final List partitions = new ArrayList<>();
/* Set to -1 if the shard is being removed */
private int targetNumPartitions = 0;
/* Size of the smallest directory in this shard */
private long minDirSize = Long.MAX_VALUE;
/* Size of the largest directory in this shard */
private long maxDirSize = 0L;
/* True if the shard is directory size imbalanced. */
private boolean sizeImbalance = false;
/* Constructor. The partition list will be empty. */
private ShardDescriptor(RepGroupId rgId) {
this.rgId = rgId;
}
RepGroupId getGroupId() {
return rgId;
}
private int getNumPartitions() {
return partitions.size();
}
int getTargetNumPartitions() {
return isTargetForRemoval() ? 0 : targetNumPartitions;
}
PartitionId removePartition() {
return partitions.isEmpty() ? null : partitions.remove(0);
}
void addPartition(PartitionId partitionId) {
partitions.add(partitionId);
}
/**
* Returns the number of partitions over or under the target number
* of partitions. If over the value will be positive. If under the
* value will be negative. If the number of partitions equals the
* target number zero is returned.
*
* @return the number of partitions over or under the target number
*/
int getOverUnder() {
return partitions.size() - getTargetNumPartitions();
}
/*
* If targeted for removal we set the target partitions to -1 and set
* the directory size to 0 so that it is not counted.
*/
private void targetForRemoval() {
minDirSize = 0L;
maxDirSize = 0L;
targetNumPartitions = -1;
}
/**
* Returns true if targetForRemoval() has been called.
*/
boolean isTargetForRemoval() {
return targetNumPartitions < 0;
}
/**
* Sets the min and max directory size for this shard.
*/
private void setDirSize(long size) {
assert !isTargetForRemoval();
if (size < minDirSize) {
minDirSize = size;
}
if (size > maxDirSize) {
maxDirSize = size;
}
}
/**
* Returns the minimum directory size for this shard. If the minimum
* has not been initialized 0 is returned.
*
* @return the minimum directory size for this shard
*/
long getMinDirSize() {
return (minDirSize < Long.MAX_VALUE) ? minDirSize : 0L;
}
/**
* Returns the maximum directory size for this shard. If the maximum
* has not been initialized 0 is returned.
*
* @return the maximum directory size for this shard
*/
private long getMaxDirSize() {
return maxDirSize;
}
/**
* Returns true if the shard is directory size imbalanced.
*/
private boolean isSizeImbalanced() {
return sizeImbalance;
}
/**
* Increments the target number of partitions by one. The resulting
* value is returned.
*
* @return the new target number of partitions.
*/
private int addTargetNumPartition() {
assert !isTargetForRemoval();
targetNumPartitions++;
return targetNumPartitions;
}
/**
* Sets the target number of partitions based on the input parameters.
* The target is based on the directory size divided by
* bytesPerPartition. If the directory size is 0, or the calculation
* results in a value less than 1, the target is set to
* minNumPartitions. If the shard is marked for removal the target
* is set to 0. The target value is returned.
*
* @param minNumPartitions min partitions
* @param bytesPerPartition the target size for each partition
* @param maxPartitions the maximum partitions that can be assigned
* @return the target number of partitions
*/
private int setTargetNumPartitions(int minNumPartitions,
long bytesPerPartition,
int maxPartitions) {
assert minNumPartitions > 0;
if (isTargetForRemoval()) {
return 0;
}
targetNumPartitions = minNumPartitions;
if ((minDirSize > 0L) && (bytesPerPartition > 0L)) {
final int np = (int)(minDirSize / bytesPerPartition);
if (np > minNumPartitions) {
targetNumPartitions = np;
}
/*
* The shard is imbalanced if it could host more partitions
* if all nodes were as large as the max.
*/
final long sizeDiff = maxDirSize - minDirSize;
sizeImbalance = (int)(sizeDiff / bytesPerPartition) > 0;
}
if (targetNumPartitions > maxPartitions) {
targetNumPartitions = maxPartitions;
}
return targetNumPartitions;
}
/* Sort by who is over the most */
@Override
public int compareTo(ShardDescriptor gd) {
if (this.equals(gd)) {
return 0;
}
return (getOverUnder() >= gd.getOverUnder()) ? 1 : -1;
}
@Override
public String toString() {
return toString(new StringBuilder(), "").toString();
}
StringBuilder toString(StringBuilder sb, String tab) {
sb.append(tab).append("ShardDescriptor[").append(rgId);
sb.append(" partitions=").append(partitions.size());
sb.append(" targetPartitions=").append(targetNumPartitions);
sb.append(" minDirSize=").append(minDirSize);
sb.append(" sizeImbalance=").append(sizeImbalance);
sb.append("]");
return sb;
}
}
/**
* Create a profile which contains over and under utilized SNs, capacity
* problems and warnings, and the excessive capacity count for a topology.
*
* This does not account for datacenter topology (one datacenter may gate
* the others in terms of how many shards can be supported, so some RN
* slots in a given datacenter may intentionally be unused), so this method
* would have to be updated when there is datacenter support.
*/
static CapacityProfile getCapacityProfile(Topology topo,
Parameters params) {
Map rnCount = new HashMap<>();
for (StorageNodeId snId : topo.getStorageNodeIds()) {
rnCount.put(snId, 0);
}
for (RepNode rn: topo.getSortedRepNodes()) {
StorageNodeId snId = rn.getStorageNodeId();
int currentCount = rnCount.get(snId);
rnCount.put(snId, ++currentCount);
}
CapacityProfile profile = new CapacityProfile();
for (StorageNodeId snId : topo.getStorageNodeIds()) {
final int count = rnCount.get(snId);
final StorageNodeParams snParams = params.get(snId);
/*
* snParams may be null if the Storage Node(s) does not exist
* in the current store. Might be removed by the user earlier.
*/
if (snParams == null) {
continue;
}
final int capacity = snParams.getCapacity();
profile.noteUtilization(snId, capacity, count);
}
return profile;
}
/**
* Packages information about the capacity characteristics of a topology.
* List SNs that are over or under utilized, and capacity violations and
* warnings.
*
* Also includes an excessCapacity count, which is a measure of all unused
* slots. Note that this count can be < 0 if the SNs are under utilized.
* Likewise, a 0 excessCapacity doesn't mean that the topology is totally
* in compliance. It could be a result of a combination of over and under
* capacity SNs.
*/
static class CapacityProfile {
private int excessCapacity;
private final List overUtilizedSNs;
private final List underUtilizedSNs;
private final List problems;
private final List warnings;
CapacityProfile() {
overUtilizedSNs = new ArrayList<>();
underUtilizedSNs = new ArrayList<>();
problems = new ArrayList<>();
warnings = new ArrayList<>();
}
public List getWarnings() {
return warnings;
}
List getProblems() {
return problems;
}
/**
* If the capacity and RN count don't match, record the discrepancy.
*/
private void noteUtilization(StorageNodeId snId,
int snCapacity,
int rnCount) {
int capacityDelta = snCapacity - rnCount;
excessCapacity += capacityDelta;
if (capacityDelta > 0) {
UnderCapacity under =
new UnderCapacity(snId, rnCount, snCapacity);
underUtilizedSNs.add(snId);
warnings.add(under);
} else if (capacityDelta < 0) {
OverCapacity over = new OverCapacity(snId, rnCount, snCapacity);
overUtilizedSNs.add(snId);
problems.add(over);
}
}
int getExcessCapacity() {
return excessCapacity;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("excess=").append(excessCapacity).append("\n");
sb.append("problems:").append(problems).append("\n");
sb.append("warnings:").append(warnings).append("\n");
return sb.toString();
}
}
/**
* Return the total sum capacity of all the SNs in the specified pool.
* Do not consider whether there are any RNs assigned to those SNs yet.
*/
static int calculateMaximumCapacity(StorageNodePool snPool,
Parameters params) {
int capacityCount = 0;
for (StorageNodeId snId : snPool.getList()) {
capacityCount += params.get(snId).getCapacity();
}
return capacityCount;
}
/**
* A Boolean predicate for rules problems.
*/
public interface RulesProblemFilter {
/**
* Returns whether the problem matches the predicate.
*/
boolean match(T problem);
}
public static class Results {
private final List violations;
private final List warnings;
Results() {
violations = new ArrayList<>();
warnings = new ArrayList<>();
}
void add(RulesProblem p) {
if (p.isViolation()) {
violations.add(p);
} else {
warnings.add(p);
}
}
public List getViolations() {
return violations;
}
public List getWarnings() {
return warnings;
}
/**
* Return both violations and warnings.
*/
public List getProblems() {
List all = new ArrayList<>(violations);
all.addAll(warnings);
return all;
}
public int numProblems() {
return violations.size() + warnings.size();
}
public int numViolations() {
return violations.size();
}
/**
* Return a list of all of this kind of problem.
*/
public List find(Class problemClass) {
List p = new ArrayList<>();
for (RulesProblem rp : violations) {
if (rp.getClass().equals(problemClass)) {
p.add(problemClass.cast(rp));
}
}
for (RulesProblem rp : warnings) {
if (rp.getClass().equals(problemClass)) {
p.add(problemClass.cast(rp));
}
}
return p;
}
/**
* Return a list of the specified kind of problem that match the
* filter.
*/
public List find(
final Class problemClass, final RulesProblemFilter filter) {
final List p = new ArrayList<>();
for (final RulesProblem rp : violations) {
if (rp.getClass().equals(problemClass)) {
final T crp = problemClass.cast(rp);
if (filter.match(crp)) {
p.add(crp);
}
}
}
for (final RulesProblem rp : warnings) {
if (rp.getClass().equals(problemClass)) {
final T crp = problemClass.cast(rp);
if (filter.match(crp)) {
p.add(problemClass.cast(rp));
}
}
}
return p;
}
@Override
public String toString() {
if (numProblems() == 0) {
return "No problems";
}
StringBuilder sb = new StringBuilder();
int numViolations = violations.size();
if (numViolations > 0) {
sb.append(numViolations);
sb.append((numViolations == 1) ? " violation.\n" :
" violations.\n");
for (RulesProblem r : violations) {
sb.append(r).append("\n");
}
}
int numWarnings = warnings.size();
if (numWarnings > 0) {
sb.append(numWarnings);
sb.append((numWarnings == 1) ? " warning.\n" :
" warnings.\n");
for (RulesProblem r : warnings) {
sb.append(r).append("\n");
}
}
return sb.toString();
}
/**
* Find all the issues that are in this results class, but are
* not in "other".
*/
public Results remove(Results other) {
Results diff = new Results();
List pruned = new ArrayList<>(violations);
pruned.removeAll(other.violations);
diff.violations.addAll(pruned);
pruned = new ArrayList<>(warnings);
pruned.removeAll(other.warnings);
diff.warnings.addAll(pruned);
return diff;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy