com.tencent.angel.psagent.PSAgentContext Maven / Gradle / Ivy
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/
package com.tencent.angel.psagent;
import com.google.protobuf.ServiceException;
import com.tencent.angel.PartitionKey;
import com.tencent.angel.RunningMode;
import com.tencent.angel.common.location.Location;
import com.tencent.angel.conf.AngelConf;
import com.tencent.angel.exception.AngelException;
import com.tencent.angel.exception.InvalidParameterException;
import com.tencent.angel.ipc.TConnection;
import com.tencent.angel.psagent.client.MasterClient;
import com.tencent.angel.psagent.client.PSControlClientManager;
import com.tencent.angel.psagent.clock.ClockCache;
import com.tencent.angel.psagent.consistency.ConsistencyController;
import com.tencent.angel.psagent.executor.Executor;
import com.tencent.angel.psagent.matrix.MatrixClient;
import com.tencent.angel.psagent.matrix.MatrixClientFactory;
import com.tencent.angel.psagent.matrix.PSAgentLocationManager;
import com.tencent.angel.psagent.matrix.PSAgentMatrixMetaManager;
import com.tencent.angel.psagent.matrix.cache.MatricesCache;
import com.tencent.angel.psagent.matrix.oplog.cache.MatrixOpLogCache;
import com.tencent.angel.psagent.matrix.storage.MatrixStorageManager;
import com.tencent.angel.psagent.matrix.transport.MatrixTransportClient;
import com.tencent.angel.psagent.matrix.transport.adapter.UserRequestAdapter;
import com.tencent.angel.psagent.task.TaskContext;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/**
* Ps agent context, it is used to share information between the all components in ps agent
*/
public class PSAgentContext {
private static final Log LOG = LogFactory.getLog(PSAgentContext.class);
private static PSAgentContext context = new PSAgentContext();
/**
* ps agent
*/
private volatile PSAgent psAgent;
/**
* task id to task context map
*/
private final ConcurrentHashMap taskContexts;
private PSAgentContext() {
taskContexts = new ConcurrentHashMap();
}
/**
* Get the single instance of PSAgentContext
*
* @return PSAgentContext the single instance of PSAgentContext
*/
public static PSAgentContext get() {
return context;
}
/**
* Get ps agent
*
* @return PSAgent
*/
public PSAgent getPsAgent() {
return psAgent;
}
/**
* Set ps agent
*
* @param psAgent ps agent
*/
public void setPsAgent(PSAgent psAgent) {
this.psAgent = psAgent;
}
/**
* Get application configuration
*
* @return Configuration application configuration
*/
public Configuration getConf() {
return psAgent.getConf();
}
/**
* Get ps agent metrics
*
* @return Map ps agent metrics
*/
public Map getMetrics() {
return psAgent.getMetrics();
}
/**
* Get rpc client to master
*
* @return MasterClient rpc client to master
*/
public MasterClient getMasterClient() {
return psAgent.getMasterClient();
}
/**
* Get matrix update cache
*
* @return MatrixOpLogCache matrix update cache
*/
public MatrixOpLogCache getOpLogCache() {
return psAgent.getOpLogCache();
}
/**
* Get rpc client to ps
*
* @return MatrixTransportClient rpc client to ps
*/
public MatrixTransportClient getMatrixTransportClient() {
return psAgent.getMatrixTransportClient();
}
/**
* Get matrix meta manager
*
* @return MatrixMetaManager matrix meta manager
*/
public PSAgentMatrixMetaManager getMatrixMetaManager() {
return psAgent.getMatrixMetaManager();
}
/**
* Get the total task number in the application
*
* @return int the total task number in the application
*/
public int getTotalTaskNum() {
return getConf().getInt(AngelConf.ANGEL_TASK_ACTUAL_NUM, 1);
}
/**
* Get ps location cache
*
* @return LocationCache ps location cache
*/
public PSAgentLocationManager getLocationManager() {
return psAgent.getLocationManager();
}
/**
* Get rpc try interval in milliseconds
*
* @return long rpc try interval in milliseconds
*/
public long getRequestSleepTimeMS() {
return getConf()
.getInt(AngelConf.ANGEL_REQUEST_SLEEP_TIME_MS, AngelConf.DEFAULT_ANGEL_REQUEST_SLEEP_TIME_MS);
}
/**
* Get maximum network bytes being transmitted
*
* @return long maximum network bytes being transmitted
*/
public long getMaxBytesInFlight() {
return getConf().getLong(AngelConf.ANGEL_NETWORK_MAX_BYTES_FLIGHT,
AngelConf.DEFAULT_ANGEL_NETWORK_MAX_BYTES_FLIGHT);
}
/**
* If report clock to master with sync mode
*
* @return true mean use sync mode, false mean use async mode
*/
public boolean syncClockEnable() {
return getConf().getBoolean(AngelConf.ANGEL_PSAGENT_SYNC_CLOCK_ENABLE,
AngelConf.DEFAULT_ANGEL_PSAGENT_SYNC_CLOCK_ENABLE);
}
/**
* Get application running mode
*
* @return RunningMode application running mode
*/
public RunningMode getRunningMode() {
return psAgent.getRunningMode();
}
/**
* Get ps agent location ip
*
* @return String ps agent location ip
*/
public String getIp() {
return psAgent.getIp();
}
/**
* Get SSP staleness value
*
* @return int SSP staleness value
*/
public int getStaleness() {
return getConf().getInt(AngelConf.ANGEL_STALENESS, AngelConf.DEFAULT_ANGEL_STALENESS);
}
/**
* Get task context for a task
*
* @param taskIndex task index
* @return TaskContext task context
*/
public TaskContext getTaskContext(int taskIndex) {
TaskContext context = taskContexts.get(taskIndex);
if (context == null) {
taskContexts.putIfAbsent(taskIndex, new TaskContext(taskIndex));
context = taskContexts.get(taskIndex);
}
return context;
}
/**
* Get SSP consistency controller
*
* @return ConsistencyController SSP consistency controller
*/
public ConsistencyController getConsistencyController() {
return psAgent.getConsistencyController();
}
/**
* Get matrix update cache
*
* @return MatrixOpLogCache matrix update cache
*/
public MatrixOpLogCache getMatrixOpLogCache() {
return psAgent.getOpLogCache();
}
/**
* Get matrix clock cache
*
* @return ClockCache matrix clock cache
*/
public ClockCache getClockCache() {
return psAgent.getClockCache();
}
/**
* Get matrix cache
*
* @return ClockCache matrix cache
*/
public MatricesCache getMatricesCache() {
return psAgent.getMatricesCache();
}
/**
* Get matrix storage manager
*
* @return MatrixStorageManager matrix storage manager
*/
public MatrixStorageManager getMatrixStorageManager() {
return psAgent.getMatrixStorageManager();
}
/**
* Get application layer request adapter
*
* @return MatrixClientAdapter application layer request adapter
*/
public UserRequestAdapter getUserRequestAdapter() {
return psAgent.getUserRequestAdapter();
}
/**
* Get the machine learning executor reference
*
* @return Executor the machine learning executor reference
*/
public Executor getExecutor() {
return psAgent.getExecutor();
}
/**
* Get local task number
*
* @return int local task number
*/
public int getLocalTaskNum() {
return getExecutor().getTaskNum();
}
/**
* Clear context
*/
public void clear() {
MatrixClientFactory.clear();
psAgent = null;
taskContexts.clear();
}
/**
* Get PSAgent id
*
* @return PSAgent id
*/
public int getPSAgentId() {
return psAgent.getId();
}
/**
* Get control connection manager
*
* @return control connection manager
*/
public TConnection getControlConnectManager() {
return psAgent.getControlConnectManager();
}
/**
* Get ps control rpc client manager
*
* @return ps control rpc client manager
*/
public PSControlClientManager getPSControlClientManager() {
return psAgent.getPsControlClientManager();
}
/**
* Get psagent location
*
* @return psagent location
*/
public Location getLocation() {
return psAgent.getLocation();
}
/**
* Global barrier synchronization method
*/
public void barrier(int taskId) throws InvalidParameterException, InterruptedException {
int matrixId = 0;
// clock first
MatrixClient client = MatrixClientFactory.get(matrixId, taskId);
client.clock(false);
int clock = client.getTaskContext().getMatrixClock(matrixId);
// wait
ClockCache cache = PSAgentContext.get().getClockCache();
List pkeys = PSAgentContext.get().getMatrixMetaManager().getPartitions(matrixId);
int syncTimeIntervalMS = PSAgentContext.get().getConf()
.getInt(AngelConf.ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS,
AngelConf.DEFAULT_ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS);
int checkMasterIntervalMs = syncTimeIntervalMS * 50;
long startTs = System.currentTimeMillis();
while (true) {
boolean sync = true;
if (cache.getClock(matrixId, pkeys.get(0)) < clock) {
sync = false;
}
if (!sync) {
Thread.sleep(syncTimeIntervalMS);
} else {
break;
}
if (System.currentTimeMillis() - startTs > checkMasterIntervalMs) {
try {
if (PSAgentContext.get().getMasterClient().getSuccessWorkerGroupNum() >= 1) {
LOG.info("Some Worker run success, do not need wait");
return;
}
} catch (ServiceException e) {
LOG.error("getSuccessWorkerGroupNum from Master falied ", e);
}
startTs = System.currentTimeMillis();
}
}
}
/**
* Get matrix client for rpc
*
* @param matrixId matrix id
* @return matrix client
*/
public MatrixClient getMatrixClient(int matrixId) throws AngelException {
return psAgent.getMatrixClient(matrixId);
}
/**
* Get matrix client for rpc
*
* @param matrixId matrix id
* @param taskIndex task id
* @return matrix client
*/
public MatrixClient getMatrixClient(int matrixId, int taskIndex) throws AngelException {
return psAgent.getMatrixClient(matrixId, taskIndex);
}
/**
* Get matrix client for rpc
*
* @param matrixName matrix name
* @return matrix client
*/
public MatrixClient getMatrixClient(String matrixName) throws AngelException {
return psAgent.getMatrixClient(matrixName);
}
/**
* Get matrix client for rpc
*
* @param matrixName matrix name
* @param taskIndex task id
* @return matrix client
*/
public MatrixClient getMatrixClient(String matrixName, int taskIndex) throws AngelException {
return psAgent.getMatrixClient(matrixName, taskIndex);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy