org.kitesdk.minicluster.MiniCluster Maven / Gradle / Ivy
The newest version!
/**
* Copyright 2014 Cloudera Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.kitesdk.minicluster;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.io.Resources;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.conf.Configuration;
import org.kitesdk.data.spi.DefaultConfiguration;
import org.kitesdk.minicluster.Service.ServiceConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A in process MiniCluster implementation for Kite that is configurable with
* Services that make up the MiniCluster. Examples of Services are HDFS, HBase,
* Zookeeper, etc...
*
* This MiniCluster should be built with its internal Builder class, which will
* validate proper configuration of the MiniCluster. For example, a Service can
* have dependencies, so the builder will validate that proper Service
* dependencies have been added to the MiniCluster.
*/
public class MiniCluster {
private static final Logger logger = LoggerFactory
.getLogger(MiniCluster.class);
public static final String BIND_IP_KEY = "bind-ip";
public static final String CLEAN_KEY = "clean";
public static final String WORK_DIR_KEY = "directory";
public static final String NAMENODE_RPC_PORT = "hdfs-namenode-rpc-port";
public static final String ZK_PORT_KEY = "zk-port";
public static final String HIVE_METASTORE_PORT = "hive-metastore-port";
public static final String HIVE_SERVER_PORT = "hive-server-port";
public static final String FLUME_CONFIGURATION = "flume-configuration";
public static final String FLUME_AGENT_NAME = "flume-agent-name";
static final String RESOURCE_URI_SCHEME = "resource";
private static final Map registeredServices = new ConcurrentHashMap();
private final List services;
private final ServiceConfig serviceConfig;
public static void registerService(Class extends Service> klass) {
Service service;
try {
service = klass.getConstructor().newInstance();
} catch (Exception e) {
logger.error("Could not get default class constructor for: "
+ klass.getName());
throw new RuntimeException(e);
}
registeredServices.put(klass.getName(), service);
}
/**
* Private constructor. The MiniCluster should be built with the Builder
* class.
*
* @param services
* cluster services The services in run order
* @param serviceConfig
* The Hadoop Configuration to start running the mini cluster
* services with.
*/
private MiniCluster(List services, ServiceConfig serviceConfig) {
this.services = services;
this.serviceConfig = serviceConfig;
}
/**
* A MiniCluster Builder.
*/
public static class Builder {
private Configuration hadoopConf;
private List services = Lists.newArrayList();
private ServiceConfig serviceConfig = new ServiceConfig();
public Builder hadoopConf(Configuration hadoopConf) {
serviceConfig.setHadoopConf(hadoopConf);
return this;
}
public Builder workDir(String workDir) {
serviceConfig.set(WORK_DIR_KEY, workDir);
return this;
}
public Builder clean(boolean clean) {
serviceConfig.set(CLEAN_KEY, Boolean.toString(clean));
return this;
}
public Builder bindIP(String bindIP) {
serviceConfig.set(BIND_IP_KEY, bindIP);
return this;
}
public Builder namenodeRpcPort(int namenodeRpcPort) {
serviceConfig.set(NAMENODE_RPC_PORT, Integer.toString(namenodeRpcPort));
return this;
}
public Builder zkPort(int zkPort) {
serviceConfig.set(ZK_PORT_KEY, Integer.toString(zkPort));
return this;
}
public Builder hiveMetastorePort(int port) {
serviceConfig.set(HIVE_METASTORE_PORT, Integer.toString(port));
return this;
}
public Builder hiveServerPort(int port) {
serviceConfig.set(HIVE_SERVER_PORT, Integer.toString(port));
return this;
}
public Builder flumeConfiguration(String resource) {
serviceConfig.set(FLUME_CONFIGURATION, toUrl(resource).toExternalForm());
return this;
}
public Builder flumeAgentName(String name) {
serviceConfig.set(FLUME_AGENT_NAME, name);
return this;
}
private URL toUrl(String resource) {
URI resourceUri = URI.create(resource);
if (RESOURCE_URI_SCHEME.equals(resourceUri.getScheme())) {
// following throws IllegalArgumentException if resource isn't found
return Resources.getResource(resourceUri.getRawSchemeSpecificPart());
} else { // treat as file path
File file = new File(resource);
if (!file.exists()) {
throw new IllegalArgumentException(String.format("File %s not found.", file));
}
try {
return file.toURI().toURL();
} catch (MalformedURLException e) {
throw new IllegalArgumentException(e);
}
}
}
/**
* Service configs are dynamic config name-value pairs that can be
* interpreted by the services. This allows new services to be added that
* have their own encapsulated configuration parameters that don't need to
* be exposed statically from this Builder.
*
* @param name
* The name of the configuration parameter
* @param value
* The value of the config setting
* @return this Builder for method chaining.
*/
public Builder setServiceConfig(String name, String value) {
serviceConfig.set(name, value);
return this;
}
public Builder addService(Class extends Service> klass) {
Preconditions.checkState(!serviceImplExists(klass),
"A service implementation already exists for: " + klass.getName());
try {
Class.forName(klass.getName());
} catch (ClassNotFoundException e) {
// ignore
}
Service service = registeredServices.get(klass.getName());
Preconditions.checkState(service != null,
"Unknown service (maybe not registered): " + klass.getName());
services.add(service);
return this;
}
public MiniCluster build() {
Preconditions.checkState(serviceConfig.get(WORK_DIR_KEY) != null,
"Must provide a path on the local filesystem to store cluster data");
if (hadoopConf == null) {
hadoopConf = new Configuration();
}
// Make the services list in run order based on each service's deps
services = getServicesInRunOrder(services);
// Return the configured mini cluster
return new MiniCluster(services, serviceConfig);
}
/**
* Given a list of services, sort them in the proper order based on their
* run dependencies reported by the service's dependencies() method.
*
* Will throw an IllegalStateException if required dependencies for a
* service isn't in the list.
*
* @return The ordered Service list.
*/
private List getServicesInRunOrder(List services) {
List orderedServices = new ArrayList();
List serviceQueue = new ArrayList(services);
while (orderedServices.size() < services.size()) {
List serviceQueueIndexesToRemove = new ArrayList();
for (int i = 0; i < serviceQueue.size(); i++) {
Service service = serviceQueue.get(i);
boolean allDependenciesIn = true;
if (service.dependencies() != null) {
for (Class extends Service> serviceClass : service.dependencies()) {
if (!serviceImplExists(orderedServices, serviceClass)) {
allDependenciesIn = false;
break;
}
}
}
if (allDependenciesIn) {
serviceQueueIndexesToRemove.add(i);
orderedServices.add(service);
}
}
if (serviceQueueIndexesToRemove.size() == 0) {
// Indicates nothing to be moved. This means we have dependencies that
// have not been added. Throw an exception.
throw new IllegalStateException(
"Required service dependencies haven't been added as services.");
} else {
int numRemoved = 0;
for (int idx : serviceQueueIndexesToRemove) {
serviceQueue.remove(idx - numRemoved++);
}
}
}
return orderedServices;
}
@SuppressWarnings("unchecked")
private T getServiceImpl(List services,
Class klass) {
for (Service service : services) {
if (service.getClass() == klass) {
return (T) service;
}
}
return null;
}
private boolean serviceImplExists(Class extends Service> klass) {
return serviceImplExists(services, klass);
}
private boolean serviceImplExists(List services,
Class extends Service> klass) {
return getServiceImpl(services, klass) != null;
}
}
/**
* Starts the services in order, passing the previous service's modified
* Configuration object to the next.
*
* @throws IOException
*/
public void start() throws IOException, InterruptedException {
for (Service service : services) {
service.configure(serviceConfig);
logger.info("Running Minicluster Service: "
+ service.getClass().getName());
service.start();
serviceConfig.setHadoopConf(service.getHadoopConf());
// set the default configuration so that the minicluster is used
DefaultConfiguration.set(serviceConfig.getHadoopConf());
}
logger.info("All Minicluster Services running.");
}
/**
* Stops the services in reverse of their run order.
*
* @throws IOException
*/
public void stop() throws IOException, InterruptedException {
for (int i = services.size() - 1; i >= 0; i--) {
Service service = services.get(i);
logger.info("Stopping Minicluster Service: "
+ service.getClass().getName());
service.stop();
}
logger.info("All Minicluster Services stopped.");
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy