org.apache.hadoop.hdfs.server.diskbalancer.datamodel.DiskBalancerCluster Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.hdfs.server.diskbalancer.datamodel;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.google.common.base.Preconditions;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.hdfs.server.diskbalancer.connectors.ClusterConnector;
import org.apache.hadoop.hdfs.server.diskbalancer.planner.NodePlan;
import org.apache.hadoop.hdfs.server.diskbalancer.planner.Planner;
import org.apache.hadoop.hdfs.server.diskbalancer.planner.PlannerFactory;
import org.apache.hadoop.hdfs.web.JsonUtil;
import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.Map;
import java.util.HashMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
/**
* DiskBalancerCluster represents the nodes that we are working against.
*
* Please Note :
*
* Semantics of inclusionList and exclusionLists.
*
* If a non-empty inclusionList is specified then the diskBalancer assumes that
* the user is only interested in processing that list of nodes. This node list
* is checked against the exclusionList and only the nodes in inclusionList but
* not in exclusionList is processed.
*
* if inclusionList is empty, then we assume that all live nodes in the nodes is
* to be processed by diskBalancer. In that case diskBalancer will avoid any
* nodes specified in the exclusionList but will process all nodes in the
* cluster.
*
* In other words, an empty inclusionList is means all the nodes otherwise
* only a given list is processed and ExclusionList is always honored.
*/
@JsonIgnoreProperties(ignoreUnknown = true)
public class DiskBalancerCluster {
private static final Logger LOG =
LoggerFactory.getLogger(DiskBalancerCluster.class);
private static final ObjectReader READER =
new ObjectMapper().readerFor(DiskBalancerCluster.class);
private final Set exclusionList;
private final Set inclusionList;
private ClusterConnector clusterConnector;
private List nodes;
private String outputpath;
@JsonIgnore
private List nodesToProcess;
@JsonIgnore
private final Map ipList;
@JsonIgnore
private final Map hostNames;
@JsonIgnore
private final Map hostUUID;
private float threshold;
/**
* Empty Constructor needed by Jackson.
*/
public DiskBalancerCluster() {
nodes = new LinkedList<>();
exclusionList = new TreeSet<>();
inclusionList = new TreeSet<>();
ipList = new HashMap<>();
hostNames = new HashMap<>();
hostUUID = new HashMap<>();
}
/**
* Constructs a DiskBalancerCluster.
*
* @param connector - ClusterConnector
* @throws IOException
*/
public DiskBalancerCluster(ClusterConnector connector) throws IOException {
this();
Preconditions.checkNotNull(connector);
clusterConnector = connector;
}
/**
* Parses a Json string and converts to DiskBalancerCluster.
*
* @param json - Json String
* @return DiskBalancerCluster
* @throws IOException
*/
public static DiskBalancerCluster parseJson(String json) throws IOException {
return READER.readValue(json);
}
/**
* readClusterInfo connects to the cluster and reads the node's data. This
* data is used as basis of rest of computation in DiskBalancerCluster
*/
public void readClusterInfo() throws Exception {
Preconditions.checkNotNull(clusterConnector);
LOG.debug("Using connector : {}" , clusterConnector.getConnectorInfo());
nodes = clusterConnector.getNodes();
for(DiskBalancerDataNode node : nodes) {
if(node.getDataNodeIP()!= null && !node.getDataNodeIP().isEmpty()) {
ipList.put(node.getDataNodeIP(), node);
}
if(node.getDataNodeName() != null && !node.getDataNodeName().isEmpty()) {
// TODO : should we support Internationalized Domain Names ?
// Disk balancer assumes that host names are ascii. If not
// end user can always balance the node via IP address or DataNode UUID.
hostNames.put(node.getDataNodeName().toLowerCase(Locale.US), node);
}
if(node.getDataNodeUUID() != null && !node.getDataNodeUUID().isEmpty()) {
hostUUID.put(node.getDataNodeUUID(), node);
}
}
}
/**
* Gets all DataNodes in the Cluster.
*
* @return Array of DisKBalancerDataNodes
*/
public List getNodes() {
return nodes;
}
/**
* Sets the list of nodes of this cluster.
*
* @param clusterNodes List of Nodes
*/
public void setNodes(List clusterNodes) {
this.nodes = clusterNodes;
}
/**
* Returns the current ExclusionList.
*
* @return List of Nodes that are excluded from diskBalancer right now.
*/
public Set getExclusionList() {
return exclusionList;
}
/**
* sets the list of nodes to exclude from process of diskBalancer.
*
* @param excludedNodes - exclusionList of nodes.
*/
public void setExclusionList(Set excludedNodes) {
this.exclusionList.addAll(excludedNodes);
}
/**
* Returns the threshold value. This is used for indicating how much skew is
* acceptable, This is expressed as a percentage. For example to say 20% skew
* between volumes is acceptable set this value to 20.
*
* @return float
*/
public float getThreshold() {
return threshold;
}
/**
* Sets the threshold value.
*
* @param thresholdPercent - float - in percentage
*/
public void setThreshold(float thresholdPercent) {
Preconditions.checkState((thresholdPercent >= 0.0f) &&
(thresholdPercent <= 100.0f), "A percentage value expected.");
this.threshold = thresholdPercent;
}
/**
* Gets the Inclusion list.
*
* @return List of machine to be processed by diskBalancer.
*/
public Set getInclusionList() {
return inclusionList;
}
/**
* Sets the inclusionList.
*
* @param includeNodes - set of machines to be processed by diskBalancer.
*/
public void setInclusionList(Set includeNodes) {
this.inclusionList.addAll(includeNodes);
}
/**
* returns a serialized json string.
*
* @return String - json
* @throws IOException
*/
public String toJson() throws IOException {
return JsonUtil.toJsonString(this);
}
/**
* Returns the Nodes to Process which is the real list of nodes processed by
* diskBalancer.
*
* @return List of DiskBalancerDataNodes
*/
@JsonIgnore
public List getNodesToProcess() {
return nodesToProcess;
}
/**
* Sets the nodes to process.
*
* @param dnNodesToProcess - List of DataNodes to process
*/
@JsonIgnore
public void setNodesToProcess(List dnNodesToProcess) {
this.nodesToProcess = dnNodesToProcess;
}
/**
* Returns th output path for this cluster.
*/
public String getOutput() {
return outputpath;
}
/**
* Sets the output path for this run.
*
* @param output - Path
*/
public void setOutput(String output) {
this.outputpath = output;
}
/**
* Writes a snapshot of the cluster to the specified directory.
*
* @param snapShotName - name of the snapshot
*/
public void createSnapshot(String snapShotName) throws IOException {
String json = this.toJson();
File outFile = new File(getOutput() + "/" + snapShotName);
FileUtils.writeStringToFile(outFile, json, StandardCharsets.UTF_8);
}
/**
* Compute plan takes a node and constructs a planner that creates a plan that
* we would like to follow.
*
* This function creates a thread pool and executes a planner on each node
* that we are supposed to plan for. Each of these planners return a NodePlan
* that we can persist or schedule for execution with a diskBalancer
* Executor.
*
* @param thresholdPercent - in percentage
* @return list of NodePlans
*/
public List computePlan(double thresholdPercent) {
List planList = new LinkedList<>();
if (nodesToProcess == null) {
LOG.warn("Nodes to process is null. No nodes processed.");
return planList;
}
int poolSize = computePoolSize(nodesToProcess.size());
ExecutorService executorService = Executors.newFixedThreadPool(poolSize);
List> futureList = new LinkedList<>();
for (int x = 0; x < nodesToProcess.size(); x++) {
final DiskBalancerDataNode node = nodesToProcess.get(x);
final Planner planner = PlannerFactory
.getPlanner(PlannerFactory.GREEDY_PLANNER, node,
thresholdPercent);
futureList.add(executorService.submit(new Callable() {
@Override
public NodePlan call() throws Exception {
assert planner != null;
return planner.plan(node);
}
}));
}
for (Future f : futureList) {
try {
planList.add(f.get());
} catch (InterruptedException e) {
LOG.error("Compute Node plan was cancelled or interrupted : ", e);
} catch (ExecutionException e) {
LOG.error("Unable to compute plan : ", e);
}
}
return planList;
}
/**
* Return the number of threads we should launch for this cluster.
*
* Here is the heuristic we are using.
*
* 1 thread per 100 nodes that we want to process. Minimum nodesToProcess
* threads in the pool. Maximum 100 threads in the pool.
*
* Generally return a rounded up multiple of 10.
*
* @return number
*/
private int computePoolSize(int nodeCount) {
if (nodeCount < 10) {
return nodeCount;
}
int threadRatio = nodeCount / 100;
int modValue = threadRatio % 10;
if (((10 - modValue) + threadRatio) > 100) {
return 100;
} else {
return (10 - modValue) + threadRatio;
}
}
/**
* Returns a node by UUID.
* @param uuid - Node's UUID
* @return DiskBalancerDataNode.
*/
public DiskBalancerDataNode getNodeByUUID(String uuid) {
return hostUUID.get(uuid);
}
/**
* Returns a node by IP Address.
* @param ipAddresss - IP address String.
* @return DiskBalancerDataNode.
*/
public DiskBalancerDataNode getNodeByIPAddress(String ipAddresss) {
return ipList.get(ipAddresss);
}
/**
* Returns a node by hostName.
* @param hostName - HostName.
* @return DiskBalancerDataNode.
*/
public DiskBalancerDataNode getNodeByName(String hostName) {
return hostNames.get(hostName);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy