Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.alibaba.schedulerx.worker.SchedulerxWorker Maven / Gradle / Ivy
package com.alibaba.schedulerx.worker;
import java.io.File;
import java.io.IOException;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import javax.net.ssl.SSLContext;
import org.apache.commons.collections.MapUtils;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLContexts;
import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.jboss.netty.channel.socket.nio.Boss;
import org.joda.time.DateTime;
import org.json.JSONException;
import org.springframework.aop.support.AopUtils;
import org.springframework.beans.BeansException;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.context.ApplicationContext;
import org.springframework.context.ApplicationContextAware;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ApplicationContextEvent;
import org.springframework.context.event.ContextClosedEvent;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.context.event.ContextStartedEvent;
import org.springframework.context.event.ContextStoppedEvent;
import com.alibaba.schedulerx.common.constants.CommonConstants;
import com.alibaba.schedulerx.common.domain.Metrics;
import com.alibaba.schedulerx.common.domain.enums.AppType;
import com.alibaba.schedulerx.common.monitor.MetricsCollector;
import com.alibaba.schedulerx.common.util.ConfigUtil;
import com.alibaba.schedulerx.common.util.IpUtil;
import com.alibaba.schedulerx.common.util.JsonUtil;
import com.alibaba.schedulerx.common.util.ReflectionUtil;
import com.alibaba.schedulerx.protocol.Worker.ContainerReportTaskStatusRequest;
import com.alibaba.schedulerx.protocol.Worker.WorkerHeartBeatRequest;
import com.alibaba.schedulerx.protocol.Worker.WorkerHeartBeatResponse;
import com.alibaba.schedulerx.protocol.Worker.WorkerOfflineRequest;
import com.alibaba.schedulerx.protocol.utils.FutureUtils;
import com.alibaba.schedulerx.worker.actor.ContainerActor;
import com.alibaba.schedulerx.worker.actor.JobInstanceActor;
import com.alibaba.schedulerx.worker.actor.LogActor;
import com.alibaba.schedulerx.worker.actor.TaskRouter;
import com.alibaba.schedulerx.worker.actor.WorkerHeartbeatActor;
import com.alibaba.schedulerx.worker.batch.ContainerStatusReqHandler;
import com.alibaba.schedulerx.worker.batch.ContainerStatusReqHandlerPool;
import com.alibaba.schedulerx.worker.container.ContainerFactory;
import com.alibaba.schedulerx.worker.container.ContainerPool;
import com.alibaba.schedulerx.worker.container.ShutdownMode;
import com.alibaba.schedulerx.worker.container.ThreadContainerPool;
import com.alibaba.schedulerx.worker.discovery.ArmoryResult;
import com.alibaba.schedulerx.worker.discovery.DefaultGroupDiscovery;
import com.alibaba.schedulerx.worker.discovery.GroupDiscovery;
import com.alibaba.schedulerx.worker.discovery.GroupManager;
import com.alibaba.schedulerx.worker.discovery.ServerDiscovery;
import com.alibaba.schedulerx.worker.discovery.ServerDiscoveryFactory;
import com.alibaba.schedulerx.worker.domain.WorkerConstants;
import com.alibaba.schedulerx.worker.exception.DomainInvalidException;
import com.alibaba.schedulerx.worker.exception.DomainNotFoundException;
import com.alibaba.schedulerx.worker.exception.NamespaceNotFoundException;
import com.alibaba.schedulerx.worker.ha.AtLeastOnceDeliveryRoutingActor;
import com.alibaba.schedulerx.worker.ha.HealthTimeHolder;
import com.alibaba.schedulerx.worker.log.LogFactory;
import com.alibaba.schedulerx.worker.log.Logger;
import com.alibaba.schedulerx.worker.log.appender.LogConfig;
import com.alibaba.schedulerx.worker.logcollector.LogCleaner;
import com.alibaba.schedulerx.worker.logcollector.LogCollector;
import com.alibaba.schedulerx.worker.logcollector.LogCollectorFactory;
import com.alibaba.schedulerx.worker.master.TaskMaster;
import com.alibaba.schedulerx.worker.master.TaskMasterPool;
import com.alibaba.schedulerx.worker.master.persistence.H2FilePersistence;
import com.alibaba.schedulerx.worker.metrics.CgroupMetrics;
import com.alibaba.schedulerx.worker.processor.JobProcessor;
import com.alibaba.schedulerx.worker.pull.PullManager;
import com.alibaba.schedulerx.worker.security.Authenticator;
import com.alibaba.schedulerx.worker.security.DefaultAuthenticator;
import com.alibaba.schedulerx.worker.service.WorkerHttpServer;
import com.alibaba.schedulerx.worker.timer.AbstractTimerTask;
import com.alibaba.schedulerx.worker.util.ConsoleUtil;
import com.alibaba.schedulerx.worker.util.DiamondUtil;
import com.alibaba.schedulerx.worker.util.FileUtils;
import com.alibaba.schedulerx.worker.util.SpringContext;
import com.alibaba.schedulerx.worker.util.WorkerIdGenerator;
import com.google.common.collect.Lists;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.ProtocolMessageEnum;
import com.mashape.unirest.http.HttpResponse;
import com.mashape.unirest.http.JsonNode;
import com.mashape.unirest.http.Unirest;
import com.typesafe.config.Config;
import akka.actor.ActorRef;
import akka.actor.ActorSelection;
import akka.actor.ActorSystem;
import akka.actor.Address;
import akka.actor.Props;
import akka.actor.Terminated;
import akka.dispatch.OnComplete;
import akka.routing.RoundRobinPool;
import javassist.compiler.JvstCodeGen;
import scala.Function;
/**
* @author xiaomeng.hxm
*/
public class SchedulerxWorker implements ApplicationContextAware, InitializingBean,
ApplicationListener {
private static final Logger LOGGER = LogFactory.getLogger(SchedulerxWorker.class);
public static ActorSystem actorSystem = null;
public static ActorRef AtLeastDeliveryRoutingActor = null;
public static volatile boolean INITED = false;
public static ClassLoader CUSTOMER_CLASS_LOADER = null;
public static String WORKER_ADDR = null;
private static GroupManager groupManager = GroupManager.INSTANCE;
private static ScheduledExecutorService heartbeatSes = new ScheduledThreadPoolExecutor(1,
new ThreadFactoryBuilder().setNameFormat("Schedulerx-heartbeat-thread").build(),
new ThreadPoolExecutor.DiscardPolicy());
private static int HEART_BEAT_TIMEOUT_TIMES = 0;
private static List EXCLUDE_KEYS = Lists.newArrayList("sls.ak","sls.sk","sls.aksk.encoded","worker.timer.tasks","appKey");
private static List INCLUDE_KEYS = Lists.newArrayList("address.server.domain","domainName","schedulerx.namespace",
"groupId");
private static final Logger heatbeatLogger = LogFactory.getLogger("heartbeat");
private volatile static String WORKER_ID = null;
private static volatile boolean forceActorSystemTerminate = false;
/**
* 忽略ssl认证
*/
private void initUnirest() {
try {
SSLContext sslcontext = SSLContexts.custom().loadTrustMaterial(null, new TrustSelfSignedStrategy()).build();
SSLConnectionSocketFactory sslsf = new SSLConnectionSocketFactory(sslcontext,
SSLConnectionSocketFactory.ALLOW_ALL_HOSTNAME_VERIFIER);
RequestConfig clientConfig = RequestConfig.custom().setConnectTimeout(5000).setSocketTimeout(5000)
.setConnectionRequestTimeout(15000).build();
CloseableHttpClient httpclient = HttpClients.custom().setDefaultRequestConfig(clientConfig)
.setSSLSocketFactory(sslsf).disableCookieManagement().build();
Unirest.setHttpClient(httpclient);
} catch (Exception e) {
LOGGER.error("Init Unirest Failed.", e);
}
}
public void init() throws Exception {
synchronized (SchedulerxWorker.class) {
// 初始化Unirest
initUnirest();
if (INITED) {
appendWorkerInit();
return;
}
LOGGER.info("Schedulerx Worker starting...");
printMvnDenpendency();
Configuration conf = ConfigUtil.getWorkerConfig();
try {
initMetaInfoFromSystem(conf);
//domain初始化系统获取对应值
String domainName = initConsoleDomain();
if (StringUtils.isBlank(domainName)) {
throw new DomainNotFoundException("Not found domainName.");
}
//获取本机host
String host = conf.getString(WorkerConstants.HOSTNAME);
if (host == null) {
host = getLocalHost();
}
//如果本机需要隔离,不init
if (isolateMachine(host)) {
return;
}
//获取groupId
List groupIdList = getGroupIds(conf);
List appKeyList = getAppKeys(conf);
// namespace初始化
String namespace = initNamespace();
String namespaceSource = initNamespaceSource();
groupManager.init(namespace, namespaceSource, groupIdList, appKeyList);
// label初始化
initCurLabel();
// 参数合法性校验
checkParameters(conf, namespace);
//从console请求环境信息
initMetaInfoFromConsole(namespace, namespaceSource, groupIdList, appKeyList);
// 初始化STS参数
initStsKey();
// 鉴权
authenticate(conf, namespace, namespaceSource, groupIdList, appKeyList);
// 初始化actorSystem及actors
WORKER_ID = startActorSystem();
// 初始化H2 file db
if (conf.getBoolean(WorkerConstants.BATCH_WORK_ENABLE, WorkerConstants.BATCH_WORK_ENABLE_DEFAULT)) {
LOGGER.info("H2FilePersistence initing...");
initStore();
LOGGER.info("H2FilePersistence inited.");
}
initServerDiscovery(groupIdList);
LOGGER.info("ServerDiscovery inited.");
LOGGER.info("LogCollector initing...");
Map groupIdMap = GroupManager.INSTANCE.getGroupId2AppGroupIdMap();
for (Entry entry : groupIdMap.entrySet()) {
initLogCollector(entry.getValue(), WORKER_ADDR, entry.getKey());
}
LOGGER.info("LogCollector inited...");
if (conf.getBoolean(WorkerConstants.CGROUP_MERTRICS_ENABLE, WorkerConstants.CGROUP_MERTRICS_ENABLE_DEFAULT)) {
CgroupMetrics.getInstance();
LOGGER.info("cgroup metrics inited.");
}
initTimerTask(conf);
LOGGER.info("timer task inited.");
if (conf.getBoolean(WorkerConstants.LOG_COLLECTOR_ENABLE, WorkerConstants.LOG_COLLECTOR_ENABLE_DEFAULT)) {
LogCleaner logCleaner = LogCollectorFactory.newCleaner();
logCleaner.init();
}
if (SpringContext.context == null || ArrayUtils.isEmpty(SpringContext.context.getBeanNamesForType(SchedulerxWorker.class))) {
initHeartBeat(WORKER_ID);
LOGGER.info("heartbeat init.");
}
// 初始化http服务
Boolean enableHttpServer = ConfigUtil.getWorkerConfig().getBoolean(WorkerConstants.HTTP_SERVER_ENABLE, false);
if (enableHttpServer) {
new WorkerHttpServer();
LOGGER.info("Schedulerx Worker http server init.");
}
LOGGER.info("Schedulerx Worker started.");
INITED = true;
} catch (Throwable t) {
LOGGER.error("Schedulerx Worker error", t);
terminateActorSystem();
if (ConfigUtil.getWorkerConfig().getBoolean(WorkerConstants.BLOCK_APP_START, WorkerConstants.BLOCK_APP_START_DEFAULT)) {
throw new IOException(("Schedulerx WorkerConfig" + ConfigUtil.toStringInclude(ConfigUtil.getWorkerConfig(), INCLUDE_KEYS)), t);
}
} finally {
LOGGER.info("Schedulerx WorkerConfig" + ConfigUtil.toStringExclude(ConfigUtil.getWorkerConfig(), EXCLUDE_KEYS));
System.out.println("Schedulerx WorkerConfig=" + ConfigUtil.toStringExclude(ConfigUtil.getWorkerConfig(), EXCLUDE_KEYS));
/**
* register hooks
*/
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
shutdown(null);
}
});
}
}
}
/**
* Worker角色offline
*/
private static void workerRoleOffline(ShutdownMode shutdownMode) throws InterruptedException {
// 通知Master节点下线对应worker信息
ContainerPool containerPool = ContainerFactory.getContainerPool();
Map instanceMasterActorPathMap = containerPool.getInstanceMasterActorPathMap();
if (MapUtils.isNotEmpty(instanceMasterActorPathMap)) {
for (Map.Entry entry:instanceMasterActorPathMap.entrySet()) {
try {
String masterHeartbeatAkkaPath = entry.getValue().replace(WorkerConstants.WORKER_AKKA_TASK_ROUTING_PATH, WorkerConstants.WORKER_AKKA_HEARTBEAT_ROUTING_PATH);
ActorSelection masterActorSelection = SchedulerxWorker.actorSystem.actorSelection(masterHeartbeatAkkaPath);
String workerIdAddr = WorkerIdGenerator.get() + "@" + WORKER_ADDR;
WorkerOfflineRequest workerOfflineRequest = WorkerOfflineRequest.newBuilder()
.setJobInstanceId(entry.getKey())
.setShutdown(false)
.setWorkerIdAddr(workerIdAddr)
.build();
FutureUtils.awaitResult(masterActorSelection, workerOfflineRequest, 3);
} catch (Exception e) {
LOGGER.error("WorkerOfflineRequest send failed. masterHeartbeatAkkaPath={}, WORKER_ADDR={} ", entry.getValue(), WORKER_ADDR, e);
}
}
}
// 稳定3S,让在途中请求到达各自状态
TimeUnit.SECONDS.sleep(3);
// 执行线程池开始下线
ThreadContainerPool.getInstance().shutdown(shutdownMode);
// 拉模式任务执行下线 TODO 完善优雅下线
PullManager.INSTANCE.stopAll();
//等待任务执行状态反馈结束
Map> statusReqHandlerMap = ContainerStatusReqHandlerPool.INSTANCE.getHandlers();
if (MapUtils.isNotEmpty(statusReqHandlerMap)) {
for (Map.Entry> entry:statusReqHandlerMap.entrySet()) {
ContainerStatusReqHandler statusReqHandler = entry.getValue();
statusReqHandler.stop(false);
}
}
}
/**
* Master角色offline
*/
private static void masterRoleOffline(ShutdownMode shutdownMode) {
for(TaskMaster taskMaster:TaskMasterPool.INSTANCE.getAllTaskMaster()){
try {
taskMaster.terminate(shutdownMode);
}catch (Throwable t){
LOGGER.warn("TaskMaster jobInstanceId={} shutdown failed.", taskMaster.getJobInstanceInfo());
}
}
}
private static long futureWait(boolean shutdownTimeout, Future future, long remainTime, long preTime)
throws ExecutionException, InterruptedException, TimeoutException {
if (shutdownTimeout) {
future.get(remainTime, TimeUnit.SECONDS);
remainTime = remainTime - (DateTime.now().getMillis() / 1000 - preTime);
} else {
// 等待worker下线结束
future.get();
}
return remainTime;
}
private static ShutdownMode getDefaultShutdownMode() {
try {
String graceShutdownMode = ConfigUtil.getWorkerConfig().getString(WorkerConstants.GRACE_SHUTDOWN_MODE, null);
if (graceShutdownMode != null) {
return ShutdownMode.valueOf(StringUtils.upperCase(graceShutdownMode));
} else {
return ShutdownMode.parseValue(ConfigUtil.getWorkerConfig().getInteger(WorkerConstants.WORKER_SHUTDOWN_MODE,
WorkerConstants.WORKER_SHUTDOWN_MODE_DEFAULT));
}
} catch (Throwable t) {
LOGGER.warn("Get default shutdown config failed.", t);
return ShutdownMode.IMMEDIATE;
}
}
/**
* Worker服务关闭
*/
public static synchronized void shutdown(ShutdownMode shutdownMode){
LOGGER.info("schedulerx worker shutdown...");
System.out.println("schedulerx worker shutdown...");
if (!INITED) {
LOGGER.info("worker shutdown ignore, worker not init.");
return;
}
INITED = false;
ExecutorService offlineExecutor = Executors.newSingleThreadExecutor();
try {
// 推送worker下线心跳
sendHeartBeat(WorkerIdGenerator.get(), false);
// 不存在下线模式则读取默认配置模式
final ShutdownMode finalShutdownMode = (shutdownMode == null)?getDefaultShutdownMode():shutdownMode;
if (finalShutdownMode == null || finalShutdownMode.equals(ShutdownMode.IMMEDIATE)) {
// 立即结束退出
return;
}
// delay时间控制
long startTime = DateTime.now().getMillis()/1000;
long remainTime = ConfigUtil.getWorkerConfig().getLong(WorkerConstants.GRACE_SHUTDOWN_TIMEOUT, WorkerConstants.GRACE_SHUTDOWN_TIMEOUT_DEFAULT);
boolean shutdownTimeout = remainTime > 0;
//Step.1 进行Worker角色下线
Future future = offlineExecutor.submit(new Runnable() {
@Override
public void run() {
try {
workerRoleOffline(finalShutdownMode);
} catch (Throwable t) {
LOGGER.error("do worker role offline failed.", t);
}
}
});
futureWait(shutdownTimeout, future, remainTime, startTime);
//Step.2 进行Worker角色下线
startTime = DateTime.now().getMillis()/1000;
future = offlineExecutor.submit(new Runnable() {
@Override
public void run() {
try {
masterRoleOffline(finalShutdownMode);
} catch (Throwable t) {
LOGGER.error("do worker role offline failed.", t);
}
}
});
futureWait(shutdownTimeout, future, remainTime, startTime);
// wait 1 second for safe shutdown
TimeUnit.SECONDS.sleep(1);
} catch (Exception e) {
LOGGER.warn("Worker shutdown failed.", e);
} finally {
// 停止下线服务线程
offlineExecutor.shutdownNow();
// 作为worker最终Master最终发送shutdown
ContainerPool containerPool = ContainerFactory.getContainerPool();
Map instanceMasterActorPathMap = containerPool.getInstanceMasterActorPathMap();
if (MapUtils.isNotEmpty(instanceMasterActorPathMap)) {
for (Map.Entry entry:instanceMasterActorPathMap.entrySet()) {
try {
String masterHeartbeatAkkaPath = entry.getValue().replace(WorkerConstants.WORKER_AKKA_TASK_ROUTING_PATH, WorkerConstants.WORKER_AKKA_HEARTBEAT_ROUTING_PATH);
ActorSelection masterActorSelection = SchedulerxWorker.actorSystem.actorSelection(masterHeartbeatAkkaPath);
String workerIdAddr = WorkerIdGenerator.get() + "@" + WORKER_ADDR;
WorkerOfflineRequest workerOfflineRequest = WorkerOfflineRequest.newBuilder()
.setJobInstanceId(entry.getKey())
.setShutdown(true)
.setWorkerIdAddr(workerIdAddr)
.build();
FutureUtils.awaitResult(masterActorSelection, workerOfflineRequest, 3);
} catch (Exception e) {
LOGGER.error("WorkerOfflineRequest send failed. masterHeartbeatAkkaPath={}, WORKER_ADDR={} ", entry.getValue(), WORKER_ADDR, e);
}
}
}
// 对剩余还存在对Master进行强制停止处理
for(TaskMaster taskMaster:TaskMasterPool.INSTANCE.getAllTaskMaster()){
try {
taskMaster.killInstance(true, "Worker master shutdown.");
}catch (Throwable t){
LOGGER.warn("TaskMaster jobInstanceId={} shutdown failed.", taskMaster.getJobInstanceInfo());
}
}
terminateActorSystem();
LOGGER.info("schedulerx worker shutdown finished.");
System.out.println("schedulerx worker finished.");
}
}
public void appendWorkerInit() throws Exception {
Configuration conf = ConfigUtil.getWorkerConfig();
// 对通过系统参数接入配置的场景进行检查,防止多个SchedulerxWorker初始化时遗漏应用分组接入
String groupDiscoveryClassName = conf.getString(WorkerConstants.WORKER_GROUP_DISCOVERY, DefaultGroupDiscovery.class.getName());
GroupDiscovery groupDiscovery = ReflectionUtil.getInstanceByClassName(groupDiscoveryClassName, SchedulerxWorker.CUSTOMER_CLASS_LOADER);
if (groupDiscovery != null && groupDiscovery.isSystemProperty()) {
if (SpringContext.context != null || SpringContext.context.getBeanNamesForType(SchedulerxWorker.class).length > 1) {
LOGGER.error("If use system property config (eg. -Dschedulerx.group=xxx), can't support exists too many beans, please check your configuration.");
throw new IllegalStateException("If use system property config (eg. -Dschedulerx.group=xxx), can't support exists too many beans, please check your configuration.");
}
}
// 本次新增的 groupId
List newGroupIds = Lists.newArrayList();
List newAppKeys = Lists.newArrayList();
List groupIds = getGroupIds(conf);
List appKeys = getAppKeys(conf);
for (int i=0; i getGroupIds(Configuration conf) throws Exception {
String groupDiscoveryClassName = conf.getString(WorkerConstants.WORKER_GROUP_DISCOVERY,
DefaultGroupDiscovery.class.getName());
GroupDiscovery groupDiscovery = ReflectionUtil.getInstanceByClassName(groupDiscoveryClassName,
SchedulerxWorker.CUSTOMER_CLASS_LOADER);
List groupIdList = groupDiscovery.getGroupIdList(conf);
if (groupIdList == null || groupIdList.isEmpty()) {
throw new IOException("please set groupId");
}
return groupIdList;
}
private List getAppKeys(Configuration conf) throws Exception {
String groupDiscoveryClassName = conf.getString(WorkerConstants.WORKER_GROUP_DISCOVERY,
DefaultGroupDiscovery.class.getName());
GroupDiscovery groupDiscovery = ReflectionUtil.getInstanceByClassName(groupDiscoveryClassName,
SchedulerxWorker.CUSTOMER_CLASS_LOADER);
List appKeyList = groupDiscovery.getAppKeyList(conf);
if (appKeyList == null || appKeyList.isEmpty()) {
throw new IOException("please set appKey");
}
return appKeyList;
}
private String initConsoleDomain() throws Exception {
// 优先-D参数解析
String domain = System.getProperty(WorkerConstants.WORKER_DOMAIN_PROPERTY);
if (StringUtils.isBlank(domain)) {
// 用户设置获取
domain = ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_DOMAIN_NAME);
}
if (StringUtils.isBlank(domain)) {
// 再从地址服务器获取domainName
domain = ConsoleUtil.getDomainFromHttpServer();
}
if (StringUtils.isBlank(domain)) {
// diamond获取
domain = DiamondUtil.getData(WorkerConstants.WORKER_DOMAIN_DIAMOND);
}
if (domain != null && domain.contains("http")) {
throw new DomainInvalidException("domainName need not http:// only domain eg: schedulerx2.tao.net");
}
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_DOMAIN_NAME, domain);
return domain;
}
/**
* 优先级
* -D -> set -> env
*/
private static void initStsKey() {
// 先从-D参数STS获取
String stsAK = System.getProperty(WorkerConstants.STS_ACESSKEY);
String stsSK = System.getProperty(WorkerConstants.STS_SECRETKEY);
String stsToken = System.getProperty(WorkerConstants.STS_TOKEN);
if (StringUtils.isBlank(stsAK) && StringUtils.isBlank(
ConfigUtil.getWorkerConfig().getString(WorkerConstants.STS_ACESSKEY))) {
// 然后再从-D参数tenant.id获取,兼容edas集成环境下命名规范
stsAK = System.getenv(WorkerConstants.STS_ACESSKEY.replace(".", "_"));
stsSK = System.getenv(WorkerConstants.STS_SECRETKEY.replace(".", "_"));
stsToken = System.getenv(WorkerConstants.STS_TOKEN.replace(".", "_"));
}
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.STS_ACESSKEY, stsAK);
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.STS_SECRETKEY, stsSK);
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.STS_TOKEN, stsToken);
}
/**
* Get local host from connection to console.
* This method must be invoked after {@link #initConsoleDomain()}
*
* @return
*/
private static String getLocalHost() {
String domain = ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_DOMAIN_NAME);
// This is hack, if there is "hsf.server.ip" env var indicating worker is in edas env, use this ip as localhost
String localHost = System.getProperty("hsf.server.ip");
if (StringUtils.isNotBlank(localHost)) {
return localHost;
}
// if there is not "hsf.server.ip", normally get local host
try (Socket socket = new Socket()) {
if (domain != null) {
if (domain.contains(":")) {
// contains ip and port
String[] tokens = domain.split(":");
String hostname = tokens[0];
int port = 0;
if (tokens[1].contains("/")) {
port = Integer.parseInt(tokens[1].split("/")[0]);
} else {
port = Integer.parseInt(tokens[1]);
}
socket.connect(new InetSocketAddress(hostname, port), 5000);
} else {
// only domain, default 80 port
socket.connect(new InetSocketAddress(domain, 80), 5000);
}
}
InetAddress address = socket.getLocalAddress();
if (address instanceof Inet6Address) {
localHost = IpUtil.getIPV4Address();
} else {
localHost = address.getHostAddress();
}
} catch (Exception e) {
LOGGER.error("get local host error", e);
localHost = IpUtil.getIPV4Address();
}
return localHost;
}
/**
* 通过-D参数获取系统变量,需要以schedulerx开头,放到configuration中
*/
private void initMetaInfoFromSystem(Configuration conf) throws Exception {
Properties properties = System.getProperties();
LOGGER.debug("system.properties=" + properties);
Iterator> it = properties.entrySet().iterator();
while (it.hasNext()) {
Entry entry = it.next();
Object key = entry.getKey();
Object value = entry.getValue();
if (key.toString().startsWith("schedulerx")) {
conf.setProperty(key.toString(), value);
}
}
}
private void initMetaInfoFromConsole(String namespace, String namespaceSource, List groupIds, List appKeys)
throws Exception {
Map properties = ConsoleUtil.fetchMetaInfoFromConsole(namespace, namespaceSource, groupIds, appKeys);
if (properties != null) {
Configuration conf = ConfigUtil.getWorkerConfig();
for (Entry entry : properties.entrySet()) {
conf.setProperty(entry.getKey(), entry.getValue());
if (entry.getKey().equals(CommonConstants.LOG_COLLECTOR_TYPE) && "schedulerx".equals(entry.getValue().toString())) {
LogConfig.INSTANCE.setEnable(true);
}
}
//如果是kubernetes应用,需要加载schedulerx-plugin-kubernetes的配置
if (properties.containsKey(CommonConstants.APP_TYPE)) {
int appType = Integer.valueOf(properties.get(CommonConstants.APP_TYPE).toString());
if (appType == AppType.KUBERNETES.getValue() || appType == AppType.ALIYUN_ACK.getValue()) {
conf.setProperty(WorkerConstants.CGROUP_MERTRICS_ENABLE, true);
LOGGER.info("appType={}, auto set cgroup.metrics.enable=true", appType);
try {
Configuration k8sConf = ConfigUtil.newConfig("schedulerx-k8s.properties");
Iterator keys = k8sConf.getKeys();
while (keys.hasNext()) {
String key = keys.next();
conf.setProperty(key, k8sConf.getProperty(key));
}
LOGGER.info("appType={}, import schedulerx-k8s.properties finished", appType);
String k8sNamespace = FileUtils.readLine(WorkerConstants.K8S_NAMESPACE_PATH);
if (k8sNamespace != null) {
conf.setProperty(WorkerConstants.K8S_NAMESPACE, k8sNamespace);
}
} catch (Exception e) {
LOGGER.warn("load schedulerx-k8s.properties failed");
}
}
}
}
}
private void initLogCollector(long appGroupId, String workerAddr, String groupId) {
LogCollector logCollector = LogCollectorFactory.refresh();
logCollector.collect(appGroupId, "schedulerx", "hello schedulerx, workerAddr:"+workerAddr, groupId);
}
private String initNamespace() {
// set优先
String namespace = ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_NAMESPACE);
if (StringUtils.isBlank(namespace)) {
//从-D参数schedulerx.namespace获取
namespace = System.getProperty(WorkerConstants.WORKER_NAMESPACE);
}
if (StringUtils.isBlank(namespace)) {
// 然后再从-D参数tenant.id获取,兼容edas集成环境下命名规范
namespace = System.getProperty(WorkerConstants.TENANT_ID);
}
return namespace;
}
private String initNamespaceSource() {
// set优先
String namespaceSource = ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_NAMESPACE_SOURCE);
if (StringUtils.isBlank(namespaceSource)) {
// 从-D获取
namespaceSource = System.getProperty(WorkerConstants.WORKER_NAMESPACE_SOURCE);
}
if (StringUtils.isBlank(namespaceSource)) {
namespaceSource = System.getenv(WorkerConstants.WORKER_NAMESPACE_SOURCE.replace(".", "_"));
}
return namespaceSource;
}
private void initCurLabel() {
// set 优先
String label = ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_LABEL);
if (StringUtils.isBlank(label)) {
// 从-D获取, EDAS灰度的worker的标签从EDAS_DEPLOY_VERSION取
label = System.getProperty(WorkerConstants.EDAS_DEPLOY_VERSION);
if (StringUtils.isBlank(label)) {
// 非灰度分组的worker的标签从WORKER_LABEL取
label = System.getProperty(WorkerConstants.WORKER_LABEL);
}
}
if (StringUtils.isBlank(label)) {
// 从env获取
label = System.getenv(WorkerConstants.EDAS_PACKAGE_VERSION);
}
if (StringUtils.isNotBlank(label)) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_CUR_LABEL, label.trim().replace(" ", "_"));
}
}
private void checkParameters(Configuration conf, String namespace) throws Exception {
if (conf.getBoolean(CommonConstants.SCHEDULERX_NAMESPACE_ENABLE, false)) {
if (StringUtils.isBlank(namespace)) {
throw new NamespaceNotFoundException("Not found namespace.");
}
}
}
// 鉴权多个分组
private void authenticate(Configuration conf, String namespace, String namespaceSource, List groupIds, List appKeys)
throws Exception {
String authenticatorClassName = conf.getString(CommonConstants.SCHEDULERX_AUTHENTICATE,
DefaultAuthenticator.class.getName());
if (authenticatorClassName.equalsIgnoreCase("null")) {
LOGGER.warn("server don't support authentication");
return;
}
Authenticator authenticator = ReflectionUtil.getInstanceByClassName(authenticatorClassName,
SchedulerxWorker.CUSTOMER_CLASS_LOADER);
if (authenticator != null) {
authenticator.authenticate(conf, namespace, namespaceSource, groupIds, appKeys);
LOGGER.info("authenticate success.");
} else {
throw new IOException("authenticator is null");
}
}
private static void initStore() throws Exception {
H2FilePersistence persistence = H2FilePersistence.getInstance();
persistence.initTable();
}
private static boolean isolateMachine(String host) throws Exception {
Configuration conf = ConfigUtil.getWorkerConfig();
List enableUnits = Lists.newArrayList(conf.getStringArray(WorkerConstants.ENABLE_UNITS));
List enableSites = Lists.newArrayList(conf.getStringArray(WorkerConstants.ENABLE_SITES));
List disableUnits = Lists.newArrayList(conf.getStringArray(WorkerConstants.DISABLE_UNITS));
List disableSites = Lists.newArrayList(conf.getStringArray(WorkerConstants.DISABLE_SITES));
String unit = System.getenv(CommonConstants.SIGMA_APP_UNIT);
String site = System.getenv(CommonConstants.SIGMA_APP_SITE);
//优先白名单
if (!enableUnits.isEmpty() || !enableSites.isEmpty()) {
if (StringUtils.isBlank(unit) || StringUtils.isBlank(site)) {
String url = "http://api.sh.gns.alibaba-inc.com/gns/armory/query?ip=" + host;
LOGGER.info("get machine info, url=" + url);
HttpResponse response = Unirest.get(url).asJson();
ArmoryResult result = JsonUtil.fromJson(response.getBody().getObject().toString(),
ArmoryResult.class);
if (result.isSuccess() && result.getData() != null) {
unit = result.getData().getUnit();
site = result.getData().getSite();
} else {
LOGGER.warn("get armory result failed, result=" + result);
throw new IOException("get armory result failed");
}
}
/**
* 兼容简写的写法,比如:
* unit=CENTER_UNIT.center, simpleUnit=center
* site=na610, simpleUnit=center.na610
*/
String simpleUnit = unit.substring(unit.indexOf(".") + 1);
String simpleSite = simpleUnit + "." + site;
if (!enableUnits.contains(simpleUnit) && !enableSites.contains(simpleSite)
&& !enableUnits.contains(unit) && !enableSites.contains(site)) {
LOGGER.warn("init isolated. ip=" + host + ", unit=" + unit + ", site=" + site);
return true;
}
} else if (!disableUnits.isEmpty() || !disableSites.isEmpty()) {
if (StringUtils.isBlank(unit) || StringUtils.isBlank(site)) {
String url = "http://api.sh.gns.alibaba-inc.com/gns/armory/query?ip=" + host;
HttpResponse response = Unirest.get(url).asJson();
ArmoryResult result = JsonUtil.fromJson(response.getBody().getObject().toString(),
ArmoryResult.class);
if (result.isSuccess() && result.getData() != null) {
unit = result.getData().getUnit();
site = result.getData().getSite();
} else {
LOGGER.warn("get armory result failed, result=" + result);
throw new IOException("get armory result failed");
}
}
/**
* 兼容简写的写法,比如:
* unit=CENTER_UNIT.center, simpleUnit=center
* site=na610, simpleUnit=center.na610
*/
String simpleUnit = unit.substring(unit.indexOf(".") + 1);
String simpleSite = simpleUnit + "." + site;
if (disableUnits.contains(simpleUnit) || disableSites.contains(simpleSite)
|| disableUnits.contains(unit) || disableSites.contains(site)) {
LOGGER.warn("init isolated. ip=" + host + ", unit=" + unit + ", site=" + site);
return true;
}
}
return false;
}
private static void initTimerTask(Configuration conf) throws Exception {
List timerTasks = ReflectionUtil.getInstancesByConf(conf,
WorkerConstants.WORKER_TIMER_TASKS);
if (timerTasks != null && !timerTasks.isEmpty()) {
for (final AbstractTimerTask timerTask : timerTasks) {
ScheduledExecutorService ses = Executors.newScheduledThreadPool(1, new ThreadFactory() {
@Override
public Thread newThread(Runnable runnable) {
return new Thread(runnable, "Worker-timer-Thread-" + timerTask.getName());
}
});
ses.scheduleAtFixedRate(timerTask, timerTask.getInitialDelay(), timerTask.getPeriod(),
TimeUnit.SECONDS);
timerTask.init();
}
}
}
private static void initServerDiscovery(List groupIdList) throws Exception {
for (String groupId : groupIdList) {
if (!groupManager.contains(groupId)) {
groupManager.startServerDiscovery(groupId);
groupManager.appendGroupId(groupId, groupId);
}
}
}
private static void initActors(ActorSystem actorSystem, final String workerId) throws Exception {
// start heartbeat actor
int heartbeatActorSize = ConfigUtil.getWorkerConfig().getInt(
WorkerConstants.WORKER_HEARTBEAT_ACTOR_NUM, WorkerConstants.WORKER_HEARTBEAT_ACTOR_NUM_DEFAULT);
actorSystem.actorOf(Props.create(WorkerHeartbeatActor.class)
.withRouter(new RoundRobinPool(heartbeatActorSize))
.withDispatcher("akka.actor.thread-dispatcher-heartbeat"),
WorkerConstants.WORKER_AKKA_HEARTBEAT_ROUTING);
// start job instance actor
int instanceActorSize = ConfigUtil.getWorkerConfig().getInt(
WorkerConstants.WORKER_JOBINSTANCE_ACTOR_NUM, WorkerConstants.WORKER_JOBINSTANCE_ACTOR_NUM_DEFAULT);
actorSystem.actorOf(Props.create(JobInstanceActor.class)
.withRouter(new RoundRobinPool(instanceActorSize))
.withDispatcher("akka.actor.thread-dispatcher-instance"),
WorkerConstants.WORKER_AKKA_JOB_INSTANCE_ROUTING);
// start log actor
int logActorSize = ConfigUtil.getWorkerConfig().getInt(
WorkerConstants.WORKER_LOG_ACTOR_NUM, WorkerConstants.WORKER_LOG_ACTOR_NUM_DEFAULT);
actorSystem.actorOf(Props.create(LogActor.class)
.withRouter(new RoundRobinPool(logActorSize))
.withDispatcher("akka.actor.thread-dispatcher-log"),
WorkerConstants.WORKER_AKKA_LOG_ROUTING);
// start container actor
int containerActorSize = ConfigUtil.getWorkerConfig().getInt(
WorkerConstants.WORKER_CONTAINER_ACTOR_NUM, WorkerConstants.WORKER_CONTAINER_ACTOR_NUM_DEFAULT);
actorSystem.actorOf(Props.create(ContainerActor.class)
.withRouter(new RoundRobinPool(containerActorSize))
.withDispatcher("akka.actor.thread-dispatcher-container"),
WorkerConstants.WORKER_AKKA_CONTAINER_ROUTING);
// start task actor
int taskActorSize = ConfigUtil.getWorkerConfig().getInt(
WorkerConstants.WORKER_TASK_ACTOR_NUM, WorkerConstants.WORKER_TASK_ACTOR_NUM_DEFAULT);
actorSystem.actorOf(TaskRouter.props(taskActorSize)
.withDispatcher("akka.actor.thread-dispatcher-task"),
WorkerConstants.WORKER_AKKA_TASK_ROUTING);
// start at-least-once-delivery actor
AtLeastDeliveryRoutingActor = actorSystem.actorOf(AtLeastOnceDeliveryRoutingActor.props(
ConfigUtil.getWorkerConfig().getInt(WorkerConstants.AT_LEAST_ONCE_DELIVERY_ACTOR_NUM,
WorkerConstants.AT_LEAST_ONCE_DELIVERY_ACTOR_DEFAULT))
.withDispatcher("akka.actor.thread-dispatcher-delivery"),
WorkerConstants.AT_LEAST_ONCE_DELIVERY_ROUTING_NAME);
// start gurantineRecoverActor
// ActorRef gurantineRecoverActor = actorSystem.actorOf(Props.create(GuarantineRecoverActor.class),
// WorkerConstants.WORKER_AKKA_GUARANTINE_RECOVER_NAME);
// actorSystem.eventStream().subscribe(gurantineRecoverActor, AssociationErrorEvent.class);
// actorSystem.eventStream().subscribe(gurantineRecoverActor, ThisActorSystemQuarantinedEvent.class);
}
private static void sendHeartBeat(final String workerId, Boolean online){
TaskMasterPool masterPool = TaskMasterPool.INSTANCE;
Configuration conf = ConfigUtil.getWorkerConfig();
String version = conf.getString(WorkerConstants.WORKER_VERSION);
String starter = conf.getString(WorkerConstants.WORKER_STARTER_MODE,
WorkerConstants.WORKER_STARTER_MODE_DEFAULT);
String label = conf.getString(WorkerConstants.WORKER_CUR_LABEL, "");
String source = conf.getString(WorkerConstants.WORKER_SOURCE, WorkerConstants.WORKER_SOURCE_UNKNOWN);
boolean enableHeartBeatLog = conf.getBoolean(WorkerConstants.HEARTBEAT_LOG_ENABLE, true);
Map groupIdMap = GroupManager.INSTANCE.getGroupId2AppGroupIdMap();
try {
for (Entry groupEntry : groupIdMap.entrySet()) {
String groupId = groupEntry.getKey();
long appGroupId = groupEntry.getValue();
String appKey = GroupManager.INSTANCE.getAppKeyByGroupId(groupId);
ServerDiscovery serverDiscovery = ServerDiscoveryFactory.getDiscovery(groupId);
if (serverDiscovery == null || serverDiscovery.getActiveHeartBeatActor() == null) {
heatbeatLogger.warn("heartbeatActor is null, can be ignored if not frequently occurs");
continue;
}
ActorSelection heartbeatActor = serverDiscovery.getActiveHeartBeatActor();
Metrics metrics = MetricsCollector.getMetrics();
if (metrics != null) {
metrics.setExecCount(ThreadContainerPool.getInstance().getCount(groupId));
}
WorkerHeartBeatRequest.Builder builder = WorkerHeartBeatRequest.newBuilder()
.setVersion(version)
.setGroupId(groupId)
.setWorkerId(workerId)
.addAllJobInstanceId(masterPool.getInstanceIds(appGroupId))
.setMetricsJson(metrics != null? JsonUtil.toJson(metrics) : "")
.setStarter(starter)
.setAppGroupId(appGroupId)
.setSource(source)
.setLabel(label)
.setOnline(online);
if (StringUtils.isNotBlank(appKey)) {
builder.setAppKey(appKey);
}
WorkerHeartBeatRequest request = builder.build();
try {
long start = System.currentTimeMillis();
WorkerHeartBeatResponse response = (WorkerHeartBeatResponse)FutureUtils.awaitResult(heartbeatActor, request, 5);
long end = System.currentTimeMillis();
if (!response.getSuccess()) {
heatbeatLogger.error("heartbeat groupId={} appKey={} to {} error, cost={}ms, errMsg={}", groupId, appKey,
heartbeatActor.anchorPath().address(), (end - start), response.getMessage());
} else if (enableHeartBeatLog) {
heatbeatLogger.info("heartbeat groupId={} to {}, cost={}ms", online, groupId,
heartbeatActor.anchorPath().address(), (end - start));
}
HealthTimeHolder.INSTANCE.resetServerHeartbeatTime();
HEART_BEAT_TIMEOUT_TIMES = 0;
} catch (TimeoutException e) {
heatbeatLogger.warn("heart beat groupId={} to {} timeout", groupId, heartbeatActor.anchorPath().address());
if (!online) {
continue;
}
if (conf.getBoolean(WorkerConstants.AKKA_REMOTING_AUTO_RECOVER, WorkerConstants.AKKA_REMOTING_AUTO_RECOVER_DEFAULT)) {
String serverAddr = serverDiscovery.getActiveServerAddr();
if (StringUtils.isNotEmpty(serverAddr)) {
String tokens[] = serverAddr.split(":");
if (tokens.length == 2) {
String host = tokens[0];
int port = Integer.valueOf(tokens[1]);
Socket socket = new Socket();
try {
//如果heartbeat超时,但是socket能通,说明akka-remoting被隔离了
//如果超过3次,则重启actorSystem
socket.connect(new InetSocketAddress(host, port), 5000);
heatbeatLogger.info("socket to {}:{} is reachable, times={}", host, port, HEART_BEAT_TIMEOUT_TIMES);
if (HEART_BEAT_TIMEOUT_TIMES >= 3) {
restartActorSystem();
HEART_BEAT_TIMEOUT_TIMES = 0;
} else {
HEART_BEAT_TIMEOUT_TIMES++;
}
} catch (Exception e2) {
//如果ip也ping不通,说明是网络问题,不做处理
heatbeatLogger.warn("socket to {}:{} is not reachable", host, port, e2);
HEART_BEAT_TIMEOUT_TIMES = 0;
} finally {
if (socket != null) {
socket.close();
}
}
} else {
heatbeatLogger.error("wrong serverAddr=" + serverAddr);
}
}
}
} catch (Exception ex) {
heatbeatLogger.warn("active server={} lost.", serverDiscovery.getActiveServerAddr(), ex);
}
}
} catch (Throwable t) {
heatbeatLogger.warn("heartbeat error", t);
}
}
private static void initHeartBeat(final String workerId) {
final Configuration conf = ConfigUtil.getWorkerConfig();
final int heartbeatInterval = conf.getInt(CommonConstants.WORKER_HEARTBEAT_INTERVAL, CommonConstants.WORKER_HEARTBEAT_INTERVAL_DEFAULT);
if (((ScheduledThreadPoolExecutor)heartbeatSes).getTaskCount() == 0){
// 没有开启过心跳线程才需要初始化注册
heartbeatSes.scheduleAtFixedRate(new Runnable() {
@Override
public void run() {
sendHeartBeat(workerId, SchedulerxWorker.INITED);
}
}, 0, heartbeatInterval, TimeUnit.SECONDS);
}
}
public Configuration getConfig() {
return ConfigUtil.getWorkerConfig();
}
/**
* !!! This method used by Schedulerx2Agent, treat it as production code, don't add test code to it!!!
*/
public static void main(String[] args) throws Exception {
try {
SchedulerxWorker worker = new SchedulerxWorker();
if (args != null && args.length == 1) {
// Start with agent mode, args[0] is agent conf file path
String agentConfPath = args[0];
initAgentConf(agentConfPath);
}
worker.init();
// 创建启动成功文件
String userName = System.getProperties().getProperty("user.name");
if (StringUtils.isBlank(userName)) {
userName = "admin";
}
File startedFile = new File("/tmp/"+userName+"/schedulerx/AgentStarted");
startedFile.getParentFile().mkdirs();
startedFile.createNewFile();
} catch (Exception e) {
LOGGER.error("Schedulerx worker start error", e);
System.exit(1);
}
}
@Override
public void setApplicationContext(ApplicationContext applicationContext) throws BeansException {
LOGGER.info("set applicationContext={} to SpringContext={}", SpringContext.context, applicationContext);
SpringContext.context = applicationContext;
//SpringContext.unlock();
if (ConfigUtil.getWorkerConfig().getProperty(WorkerConstants.WORKER_STARTER_MODE) == null) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_STARTER_MODE,
WorkerConstants.WORKER_STARTER_SPRING);
}
}
@Override
public void afterPropertiesSet() throws Exception {
LOGGER.info("initializing bean...");
init();
}
private void printMvnDenpendency() {
LOGGER.info("===maven dependencies===");
LOGGER.info("netty:" + Boss.class.getResource(""));
LOGGER.info("protobuf-java:" + ProtocolMessageEnum.class.getResource(""));
LOGGER.info("javaassist:" + JvstCodeGen.class.getResource(""));
LOGGER.info("commons-configuration:" + Configuration.class.getResource(""));
LOGGER.info("config:" + Config.class.getResource(""));
LOGGER.info("gson:" + JSONException.class.getResource(""));
LOGGER.info("scala:" + Function.class.getResource(""));
LOGGER.info("===================");
}
public void setDomainName(String domainName) {
// set优先级低
if (StringUtils.isBlank(ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_DOMAIN_NAME))) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_DOMAIN_NAME, domainName);
}
}
public void setGroupId(String groupId) {
synchronized (SchedulerxWorker.class) {
Configuration workerConf = ConfigUtil.getWorkerConfig();
if (INITED) {
// Object existGroupId = ConfigUtil.getWorkerConfig().getProperty(WorkerConstants.GROUP_ID);
// ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.GROUP_ID, existGroupId.toString() + "," + groupId);
String[] initedGroups = workerConf.getStringArray(WorkerConstants.GROUP_ID);
List groupIdList = Lists.newArrayList(initedGroups);
String[] newGroups = groupId.split(",");
List newGroupIdList = Lists.newArrayList(newGroups);
groupIdList.addAll(newGroupIdList);
workerConf.setProperty(WorkerConstants.GROUP_ID, StringUtils.join(groupIdList, ","));
} else {
workerConf.setProperty(WorkerConstants.GROUP_ID, groupId);
}
}
}
public void setAppKey(String appKey) {
synchronized (SchedulerxWorker.class) {
Configuration workerConf = ConfigUtil.getWorkerConfig();
if (INITED) {
// Object existAppKey = ConfigUtil.getWorkerConfig().getProperty(WorkerConstants.APP_KEY);
// ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.APP_KEY, existAppKey.toString() + "," + appKey);
String[] initedAppkeys = workerConf.getStringArray(WorkerConstants.APP_KEY);
List appkeyList = Lists.newArrayList(initedAppkeys);
String[] newGroups = appKey.split(",");
List newGroupIdList = Lists.newArrayList(newGroups);
appkeyList.addAll(newGroupIdList);
workerConf.setProperty(WorkerConstants.APP_KEY, StringUtils.join(appkeyList, ","));
} else {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.APP_KEY, appKey);
}
}
}
public void setEnableBatchWork(boolean enableBatchWork) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.BATCH_WORK_ENABLE, enableBatchWork);
}
private static void initAgentConf(String agentConfPath) {
Configuration agentConf = ConfigUtil.newConfig(agentConfPath);
if (agentConf == null) {
LOGGER.error("load agent conf error, agentConf path:{}", agentConfPath);
return;
}
Configuration workerConf = ConfigUtil.getWorkerConfig();
workerConf.setProperty(WorkerConstants.BATCH_WORK_ENABLE, false);
workerConf.setProperty(WorkerConstants.SHARE_CONTAINER_POOL, true);
workerConf.setProperty(WorkerConstants.WORKER_DOMAIN_NAME, agentConf.getProperty(WorkerConstants.WORKER_DOMAIN_NAME));
workerConf.setProperty(WorkerConstants.GROUP_ID, agentConf.getProperty(WorkerConstants.GROUP_ID));
workerConf.setProperty(WorkerConstants.APP_KEY, agentConf.getProperty(WorkerConstants.APP_KEY));
workerConf.setProperty(WorkerConstants.WORKER_NAMESPACE, agentConf.getProperty(WorkerConstants.AGENT_NAMESPACE));
workerConf.setProperty(WorkerConstants.ALIYUN_ACESSKEY, agentConf.getProperty(WorkerConstants.AGENT_ALIYUN_ACCESS_KEY));
workerConf.setProperty(WorkerConstants.ALIYUN_SECRETKEY, agentConf.getProperty(WorkerConstants.AGENT_ALIYUN_SECRET_KEY));
workerConf.setProperty(WorkerConstants.ADDRESS_SERVER_DOMAIN, agentConf.getProperty(WorkerConstants.AGENT_ENDPOINT));
workerConf.setProperty(WorkerConstants.ADDRESS_SERVER_PORT, agentConf.getProperty(WorkerConstants.AGENT_ENDPOINT_PORT));
workerConf.setProperty(WorkerConstants.WORKER_LABEL, agentConf.getProperty(WorkerConstants.AGENT_LABEL));
workerConf.setProperty(WorkerConstants.BROADCAST_DISPATCH_THREAD_ENABLE, agentConf.getBoolean(WorkerConstants.BROADCAST_DISPATCH_THREAD_ENABLE, false));
workerConf.setProperty(WorkerConstants.BROADCAST_DISPATCH_THREAD_NUM, agentConf.getInt(WorkerConstants.BROADCAST_DISPATCH_THREAD_NUM, 4));
workerConf.setProperty(WorkerConstants.GRACE_SHUTDOWN_MODE, "WAIT_ALL");
workerConf.setProperty(WorkerConstants.GRACE_SHUTDOWN_TIMEOUT, agentConf.getLong(WorkerConstants.GRACE_SHUTDOWN_TIMEOUT, 15));
workerConf.setProperty(WorkerConstants.HTTP_CLIENT_MAX_TOTAL_CONNECTIONS, agentConf.getInt(WorkerConstants.HTTP_CLIENT_MAX_TOTAL_CONNECTIONS, 20));
Boolean enableHttpServer = agentConf.getBoolean(WorkerConstants.HTTP_SERVER_ENABLE, null);
if (enableHttpServer != null) {
workerConf.setProperty(WorkerConstants.HTTP_SERVER_ENABLE, enableHttpServer);
}
Integer httpServerPort = agentConf.getInteger(WorkerConstants.HTTP_SERVER_PORT, null);
if (httpServerPort != null) {
workerConf.setProperty(WorkerConstants.HTTP_SERVER_PORT, httpServerPort);
}
Object mapMasterStatusCheckInterval = agentConf.getProperty(WorkerConstants.Map_MASTER_STATUS_CHECK_INTERVAL);
if (mapMasterStatusCheckInterval != null) {
workerConf.setProperty(WorkerConstants.Map_MASTER_STATUS_CHECK_INTERVAL, mapMasterStatusCheckInterval);
}
String h2User = agentConf.getString(WorkerConstants.H2_DATABASE_USER_KEY);
if (StringUtils.isNotEmpty(h2User)) {
workerConf.setProperty(WorkerConstants.H2_DATABASE_USER_KEY, h2User);
}
String h2Password = agentConf.getString(WorkerConstants.H2_DATABASE_PASSWORD_KEY);
if (StringUtils.isNotEmpty(h2User)) {
workerConf.setProperty(WorkerConstants.H2_DATABASE_PASSWORD_KEY, h2Password);
}
String starterMode = System.getProperty(WorkerConstants.SCHEDULERX_STARTER_MODE);
if (StringUtils.isNotEmpty(starterMode)) {
workerConf.setProperty(WorkerConstants.WORKER_STARTER_MODE, starterMode);
if (starterMode.equalsIgnoreCase(WorkerConstants.WORKER_STARTER_POD)) {
workerConf.setProperty(WorkerConstants.CGROUP_MERTRICS_ENABLE, true);
LOGGER.info("starterMode={}, auto set cgroup.metrics.enable=true", starterMode);
}
} else {
workerConf.setProperty(WorkerConstants.WORKER_STARTER_MODE, WorkerConstants.WORKER_STARTER_AGENT);
}
}
@SuppressWarnings("unchecked")
public static void restartActorSystem() throws Exception {
if (actorSystem != null) {
scala.concurrent.Future terminatedFuture = terminateActorSystem();
LOGGER.info("actorSystem terminating...");
terminatedFuture.onComplete( new OnComplete(){
@Override
public void onComplete(Throwable failure, Object success) throws Throwable {
LOGGER.info("actorSystem terminated, ready to restart actorSystem");
String workerId = WorkerIdGenerator.get();
String host = ConfigUtil.getWorkerConfig().getString(WorkerConstants.HOSTNAME);
if (host == null) {
host = getLocalHost();
}
// String host = ConfigUtil.getWorkerConfig().getString(WorkerConstants.HOSTNAME, getLocalHost());
int port = ConfigUtil.getWorkerConfig().getInt(WorkerConstants.PORT, 0);
Config akkaConfig = ConfigUtil.getAkkaConfig("akka-worker.conf", host, port);
actorSystem = ActorSystem.create(workerId, akkaConfig);
forceActorSystemTerminate = false;
actorSystem.registerOnTermination(new Runnable() {
@Override
public void run() {
try {
if (!forceActorSystemTerminate) {
LOGGER.info("Restart Actor System.");
restartActorSystem();
} else {
LOGGER.info("Force terminate actor system...");
}
} catch (Exception e) {
LOGGER.error("Restart actorSystem failed.", e);
}
}
});
initActors(actorSystem, workerId);
LOGGER.info("actors inited.");
Address address = actorSystem.provider().getDefaultAddress();
WORKER_ADDR = address.host().get() + ":" + address.port().get();
ConfigUtil.getWorkerConfig().setProperty("akkaPath", address.toString());
LOGGER.info("actor system restarted, address={}", address.toString());
groupManager.reset(actorSystem);
// initHeartBeat(workerId);
// LOGGER.info("heartbeat inited.");
INITED = true;
}
} , actorSystem.dispatcher());
}
}
@SuppressWarnings("unchecked")
public static String startActorSystem() throws Exception {
String workerId = WorkerIdGenerator.get();
String host = ConfigUtil.getWorkerConfig().getString(WorkerConstants.HOSTNAME);
if (host == null) {
host = getLocalHost();
}
// String host = ConfigUtil.getWorkerConfig().getString(WorkerConstants.HOSTNAME, getLocalHost());
int port = ConfigUtil.getWorkerConfig().getInt(WorkerConstants.PORT, 0);
Config akkaConfig = ConfigUtil.getAkkaConfig("akka-worker.conf", host, port);
actorSystem = ActorSystem.create(workerId, akkaConfig);
forceActorSystemTerminate = false;
actorSystem.registerOnTermination(new Runnable() {
@Override
public void run() {
try {
if (!forceActorSystemTerminate) {
LOGGER.info("Restart Actor System.");
restartActorSystem();
} else {
LOGGER.info("Force terminate actor system...");
}
} catch (Exception e) {
LOGGER.error("Restart actorSystem failed.", e);
}
}
});
Address address = actorSystem.provider().getDefaultAddress();
WORKER_ADDR = address.host().get() + ":" + address.port().get();
ConfigUtil.getWorkerConfig().setProperty("akkaPath", address.toString());
LOGGER.info("actor system started, address={}", address.toString());
initActors(actorSystem, workerId);
LOGGER.info("actors inited.");
return workerId;
}
private static scala.concurrent.Future terminateActorSystem() {
if (actorSystem != null) {
forceActorSystemTerminate = true;
return actorSystem.terminate();
}
return null;
}
public void setEnableUnits(String units) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.ENABLE_UNITS, units);
}
public void setEnableSites(String sites) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.ENABLE_SITES, sites);
}
public void setDisableUnits(String units) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.DISABLE_UNITS, units);
}
public void setDisableSites(String sites) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.DISABLE_SITES, sites);
}
public void setAliyunAccessKey(String aliyunAccessKey) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.ALIYUN_ACESSKEY, aliyunAccessKey);
}
public void setAliyunSecretKey(String aliyunSecretKey) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.ALIYUN_SECRETKEY, aliyunSecretKey);
}
public void setSTSAccessKey(String stsAccessKey) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.STS_ACESSKEY, stsAccessKey);
}
public void setSTSSecretKey(String stsSecretKey) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.STS_SECRETKEY, stsSecretKey);
}
public void setSTSSecretToken(String stsSecretToken) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.STS_TOKEN, stsSecretToken);
}
public void setHost(String host) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.HOSTNAME, host);
}
public void setPort(int port) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.PORT, port);
}
// public void setHttpConnectionTimeout(long ms) {
// ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_HTTP_CONNECTION_TIMEOUT, ms);
// }
public void setClassLoader(ClassLoader userClassLoader) {
CUSTOMER_CLASS_LOADER = userClassLoader;
}
public void setNamespace(String namespace) {
if (StringUtils.isBlank(ConfigUtil.getWorkerConfig().getString(WorkerConstants.WORKER_NAMESPACE))) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_NAMESPACE, namespace);
}
}
public void setNamespaceSource(String namespaceSource) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_NAMESPACE_SOURCE, namespaceSource);
}
public void setEndpoint(String endpoint) {
if (StringUtils.isBlank(
(String)ConfigUtil.getWorkerConfig().getProperty(WorkerConstants.ADDRESS_SERVER_DOMAIN))) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.ADDRESS_SERVER_DOMAIN, endpoint);
}
}
public void setEndpointPort(int endpointPort) {
if (StringUtils.isBlank(
(String)ConfigUtil.getWorkerConfig().getProperty(WorkerConstants.ADDRESS_SERVER_PORT))) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.ADDRESS_SERVER_PORT, String.valueOf(endpointPort));
}
}
/**
* @param maxSize, byte
*/
public void setMaxTaskBodySize(int maxSize) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.TASK_BODY_SIZE_MAX, maxSize);
}
public void setBlockAppStart(boolean block) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.BLOCK_APP_START, block);
}
public void setShareContainerPool(boolean shareConatinerPool) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.SHARE_CONTAINER_POOL, shareConatinerPool);
}
public void setThreadPoolMode(String mode) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.THREAD_POOL_MODE, mode);
}
public void setSharePoolSize(int sharePoolSize) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.SHARE_POOL_SIZE, sharePoolSize);
}
public void setSharePoolQueueSize(int queueSize) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.SHARE_POOL_QUEUE_SIZE, queueSize);
}
/**
* set sls collector switch, false value will not use sls log collector which may cause OOM
* default is true
* @param enable
*/
public void setSlsCollectorEnable(boolean enable){
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.SLS_COLLECTOR_ENABLE, enable);
}
public void setLabel(String label) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_LABEL, label);
}
public void setLabelPath(String labelPath) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_LABEL_PATH, labelPath);
}
/**
* enable cgroup metrics collection in docker
* @param enable
*/
public void setEnableCgroupMetrics(boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.CGROUP_MERTRICS_ENABLE, enable);
}
/**
* set cgroup path if the path is not '/sys/fs/cgroup/cpu/'
* @param cgroupPathPrefix
*/
public void setCgroupPathPrefix(String cgroupPathPrefix) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.CGROUP_PATH_PREFIX, cgroupPathPrefix);
}
public void setAkkaRemotingAutoRecover(boolean autoRecover) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.AKKA_REMOTING_AUTO_RECOVER, autoRecover);
}
public void setEnableHeartbeatLog(boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.HEARTBEAT_LOG_ENABLE, enable);
}
public void setMapMasterStatusCheckInterval(int interval) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.Map_MASTER_STATUS_CHECK_INTERVAL, interval);
}
public void setEnableSecondDelayCycleIntervalMs(boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.SECOND_DELAY_INTERVAL_MS_ENABLE, enable);
}
public void setEnableMapMasterFailover(boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.MAP_MASTER_FAILOVER_ENABLE, enable);
}
public void setMapMasterDispatchRandom(boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.MAP_MASTER_DISPATCH_RANDOM, enable);
}
public void setMapMasterRouterStrategy(Integer strategy) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.MAP_MASTER_ROUTER_STRATEGY, strategy);
}
public void setEnableSecondDelayStandaloneDispatch(boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.SECOND_DELAY_STANDALONE_DISPATCH, enable);
}
public void setPageSize(int pageSize) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_MAP_PAGE_SIZE, pageSize);
}
public void setGraceShutdownTimeout(long delay) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.GRACE_SHUTDOWN_TIMEOUT, delay);
}
@Deprecated
public void setWorkerShutdownMode(Integer mode) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.WORKER_SHUTDOWN_MODE, mode);
}
public void setGraceShutdownMode(String mode) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.GRACE_SHUTDOWN_MODE, mode);
}
public void setBroadcastDispatchThreadNum(int num) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.BROADCAST_DISPATCH_THREAD_NUM, num);
}
public void setBroadcastDispatchThreadEnable(Boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.BROADCAST_DISPATCH_THREAD_ENABLE, enable);
}
public void setBroadcastDispatchRetryTimes(int num) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.BROADCAST_DISPATCH_RETRY_TIMES, num);
}
public void setBroadcastMasterExecEnable(Boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.BROADCAST_MASTER_EXEC_ENABLE, enable);
}
public void setProcessorPoolSize(Map processorPoolSize) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.PROCESSOR_THREAD_POOL_SIZE, processorPoolSize);
}
public void setH2DatabaseUser(String h2User) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.H2_DATABASE_USER_KEY, h2User);
}
public void setH2DatabasePassword(String h2Password) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.H2_DATABASE_PASSWORD_KEY, h2Password);
}
public void setHttpServerEnable(Boolean enable) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.HTTP_SERVER_ENABLE, enable);
}
public void setHttpServerPort(Integer port) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.HTTP_SERVER_PORT, port);
}
public void setMaxMapDiskPercent(float diskPercent) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.MAP_DISK_PERCENT_MAX, diskPercent);
}
public void setHttpClientMaxTotalConnections(Integer httpClientMaxTotalConnections) {
ConfigUtil.getWorkerConfig().setProperty(WorkerConstants.HTTP_CLIENT_MAX_TOTAL_CONNECTIONS, httpClientMaxTotalConnections);
}
@Override
public void onApplicationEvent(ApplicationContextEvent event) {
/**
* if first boot, should be root Spring Context can be unlock.
* else in runtime start/stop/close/refresh any applicationContext, should be lock or unlock.
*/
/*boolean isRootParentContext = event.getApplicationContext().getParent() == null;
if (isRootParentContext && FIRST_BOOT.compareAndSet(true,false)) {
elegantStartAndStop(event);
return;
}else if(FIRST_BOOT.compareAndSet(false, false)){
elegantStartAndStop(event);
return;
}
LOGGER.warn(
"SpringApplicationContext={} won't change to {} because SpringApplicationContext:{} and ApplicationContextEvent:{} is not root parent context or is not Refreshable context.",
SpringContext.context,
event.getApplicationContext(), event.getClass().getName());*/
elegantStartAndStop(event);
}
/** Desc:
* 〈Support Spring Parent-Child Context Application elegant〉
*
* @param event
* @since : 1.0.0
* @author : ChengYu.lyc
* @date : 2021/6/30 9:08 下午
*/
private void elegantStartAndStop(ApplicationContextEvent event) {
if (event instanceof ContextStartedEvent) {
LOGGER.warn("SpringApplicationContext={} started and change to {}.", SpringContext.context,
event.getApplicationContext());
SpringContext.context = event.getApplicationContext();
loadProcessorBean(SpringContext.context);
SpringContext.unlock();
initHeartBeat(WORKER_ID);
LOGGER.info("[ContextStartedEvent] heartbeat init.");
} else if (event instanceof ContextRefreshedEvent) {
LOGGER.warn("SpringApplicationContext={} refreshed to {}.", SpringContext.context,
event.getApplicationContext());
SpringContext.context = event.getApplicationContext();
loadProcessorBean(SpringContext.context);
SpringContext.unlock();
initHeartBeat(WORKER_ID);
LOGGER.info("[ContextRefreshedEvent] heartbeat init.");
} else if (event instanceof ContextStoppedEvent) {
shutdown(null);
SpringContext.lock();
LOGGER.warn("SpringApplicationContext={} stopped.", event.getApplicationContext());
} else if (event instanceof ContextClosedEvent) {
shutdown(null);
SpringContext.lock();
LOGGER.warn("SpringApplicationContext={} closed.", event.getApplicationContext());
}
LOGGER.warn("SchedulerxWorker Lock times:{}, unLock times:{}.", SpringContext.lockTimes(), SpringContext.unLockTimes());
}
/**
* 加载ProcessorBean
* @param context
*/
private void loadProcessorBean(ApplicationContext context) {
Map beanMap = context.getBeansOfType(JobProcessor.class);
for (Map.Entry processorBean : beanMap.entrySet()) {
JobProcessor processor = processorBean.getValue();
if (AopUtils.isAopProxy(processor)) {
SpringContext.putBeanName(AopUtils.getTargetClass(processor), processor);
}
}
}
}