All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tencent.angel.conf.AngelConf Maven / Gradle / Ivy

There is a newer version: 3.2.0
Show newest version
/*
 * Tencent is pleased to support the open source community by making Angel available.
 *
 * Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in 
 * compliance with the License. You may obtain a copy of the License at
 *
 * https://opensource.org/licenses/Apache-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 *
 */


package com.tencent.angel.conf;

import com.tencent.angel.RunningMode;
import com.tencent.angel.data.inputformat.BalanceInputFormat;
import com.tencent.angel.master.AngelApplicationMaster;
import com.tencent.angel.master.slowcheck.TaskCalPerfChecker;
import com.tencent.angel.ps.ParameterServer;
import com.tencent.angel.ps.storage.matrix.PartitionSourceMap;
import com.tencent.angel.psagent.PSAgent;
import com.tencent.angel.psagent.matrix.cache.DefaultPolicy;
import com.tencent.angel.utils.DefaultAppSubmitter;
import com.tencent.angel.worker.Worker;
import com.tencent.angel.worker.task.BaseTask;
import org.apache.hadoop.conf.Configuration;

import java.util.Map;
import java.util.Properties;

/**
 * Angel system parameters.
 */
public class AngelConf extends Configuration {
  public AngelConf(Configuration conf) {
    super(conf);
  }

  public AngelConf() {
    super();
  }

  private static final String ANGEL_DEFAULT_XML_FILE = "angel-default.xml";
  private static final String ANGEL_SITE_XML_FILE = "angel-site.xml";

  private static final String ANGEL_PREFIX = "angel.";
  private static final String ANGEL_AM_PREFIX = "angel.am.";
  private static final String ANGEL_WORKER_PREFIX = "angel.worker.";
  private static final String ANGEL_PS_PREFIX = "angel.ps.";
  private static final String ANGEL_TASK_PREFIX = "angel.task.";
  private static final String ANGEL_WORKERGROUP_PREFIX = "angel.workergroup.";

  // //////////////////////////////
  // Application Configs
  // //////////////////////////////

  /**
   * Task action type. There are two action types now:train and predict.
   * 

* "train" action type means training model use training data. *

* "predict" action type means predict result use model. */ public static final String ANGEL_ACTION_TYPE = "action.type"; public static final String DEFAULT_ANGEL_ACTION_TYPE = "train"; /** * Training data path. */ public static final String ANGEL_TRAIN_DATA_PATH = "angel.train.data.path"; public static final String ANGEL_VALIDATE_DATA_PATH = "angel.validate.data.path"; /** * kerberos config */ public static final String ANGEL_KERBEROS_KEYTAB = "angel.kerberos.keytab"; public static final String ANGEL_KERBEROS_PRINCIPAL = "angel.kerberos.principal"; public static final String ANGEL_KERBEROS_KEYTAB_NAME="angel.kerberos.keytab.name"; /** * Predict data path. */ public static final String ANGEL_PREDICT_DATA_PATH = "angel.predict.data.path"; /** * Input data path use by Angel */ public static final String ANGEL_JOB_INPUT_PATH = "angel.job.input.path"; /** * Training data file format. */ public static final String ANGEL_INPUTFORMAT_CLASS = ANGEL_PREFIX + "input.format"; public static final String DEFAULT_ANGEL_INPUTFORMAT_CLASS = BalanceInputFormat.class.getName(); /** * Predict result output path. If use "predict" action, we need set it. */ public static final String ANGEL_PREDICT_PATH = "angel.predict.out.path"; /** * Serving temp output path. If use "serving" action, we need set it. */ public static final String ANGEL_SERVING_TEMP_PATH = "angel.serving.temp.path"; /** * Serving client type. If use "serving" action, we need set it. */ public static final String ANGEL_SERVING_CLIENT_TYPE = "angel.serving.client.type"; /** * Model save path. This parameter is used in "train" action. */ public static final String ANGEL_SAVE_MODEL_PATH = "angel.save.model.path"; /** * Enable epoch trigger model save */ public static final String ANGEL_SAVE_MODEL_EPOCH_TIGGER_ENABLE = "angel.save.model.epoch.trigger.enable"; public static final boolean DEFAULT_ANGEL_SAVE_MODEL_EPOCH_TIGGER_ENABLE = false; /** * Save model every how many epochs, it is effective only as "angel.save.model.epoch.trigger.enable" is set to true */ public static final String ANGEL_SAVE_MODEL_EVERY_HOWMANY_EPOCHS = "angel.save.model.every.howmany.epochs"; public static final int DEFAULT_ANGEL_SAVE_MODEL_EVERY_HOWMANY_EPOCHS = 1; /** * Log save path. This parameter is used in "train" action, each iteration outputs some algorithm * indicators to this file */ public static final String DEFAULT_METRIC_FORMAT = "%10.6e"; public static final String ANGEL_LOG_PATH = "angel.log.path"; public static final String ANGEL_LOG_FAST_WRITE = "angel.log.fast.write.enable"; public static final Boolean DEFAULT_ANGEL_LOG_FAST_WRITE = true; public static final String ANGEL_LOG_FLUSH_MIN_SIZE = "angel.log.flush.min.size"; public static final int DEFAULT_ANGEL_LOG_FLUSH_MIN_SIZE = 8; /** * Model load path. This parameter is used in both "train" and "predict" actions. In "train" * action, we can load old model from file to implement incremental training. In "predict" action, * we can load model from file to generate predict results. */ public static final String ANGEL_LOAD_MODEL_PATH = "angel.load.model.path"; /** * Application deploy mode, now support YARN and LOCAL mode */ public static final String ANGEL_DEPLOY_MODE = "angel.deploy.mode"; public static final String DEFAULT_ANGEL_DEPLOY_MODE = "YARN"; // YARN, LOCAL /** * Application running mode, now support ANGEL_PS_WORKER and ANGEL_PS. *

* ANGEL_PS_WORKER means startup workers and pss. *

* ANGEL_PS means only startup pss, tasks are executed by a third-party computing system. */ public static final String ANGEL_RUNNING_MODE = ANGEL_PREFIX + "running.mode"; public static final String DEFAULT_ANGEL_RUNNING_MODE = RunningMode.ANGEL_PS_WORKER.toString(); /** * User application jar package */ public static final String ANGEL_JOB_JAR = ANGEL_PREFIX + "job.jar"; public static final String ANGEL_ML_CONF = "angel.ml.conf"; /** * The lib jars used by the Angel application. If the Angel application want to use some third * part jars, it can add the paths of them to the parameter. The paths are separated by commas. */ public static final String ANGEL_JOB_LIBJARS = ANGEL_PREFIX + "job.libjars"; /** * The resource pool of application, it is used by YARN to allocate resources for the application. */ public static final String ANGEL_QUEUE = "queue"; /** * Angel application name. */ public static final String ANGEL_JOB_NAME = ANGEL_PREFIX + "job.name"; public static final String DEFAULT_ANGEL_JOB_NAME = "angel app"; /** * Weather delete the output directory if it exists. */ public static final String ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST = ANGEL_PREFIX + "output.path.deleteonexist"; public static final boolean DEFAULT_ANGEL_JOB_OUTPUT_PATH_DELETEONEXIST = false; /** * Weather delete the stage directory when Angel application exit. */ public static final String ANGEL_JOB_REMOVE_STAGING_DIR_ENABLE = ANGEL_PREFIX + "remove.staging.dir.enable"; public static final boolean DEFAULT_ANGEL_JOB_REMOVE_STAGING_DIR_ENABLE = true; /** * Angel application configuration file. Angel supports submitting a application with a * configuration file, which contains the parameters of the application. */ public static final String ANGEL_APP_CONFIG_FILE = "angel.app.config.file"; /** * Angel application submit class. */ public static final String ANGEL_APP_SUBMIT_CLASS = "angel.app.submit.class"; public static final String DEFAULT_ANGEL_APP_SUBMIT_CLASS = DefaultAppSubmitter.class.getName(); /** * Use to upload files and archives used by the Angel application while submit the application. */ public static final String ANGEL_JOB_CACHE_ARCHIVES = ANGEL_PREFIX + "job.cache.archives"; public static final String ANGEL_JOB_CACHE_FILES = ANGEL_PREFIX + "job.cache.files"; /** * Application view ACL, it is used by YARN. */ public static final String JOB_ACL_VIEW_JOB = ANGEL_PREFIX + "job.acl-view-job"; public static final String DEFAULT_JOB_ACL_VIEW_JOB = " "; /** * Application modify ACL, it is used by YARN. */ public static final String JOB_ACL_MODIFY_JOB = ANGEL_PREFIX + "job.acl-modify-job"; public static final String DEFAULT_JOB_ACL_MODIFY_JOB = ANGEL_PREFIX + "job.acl-modify-job"; /** * The completion cancel token for the application, it is used for YARN. */ public static final String JOB_CANCEL_DELEGATION_TOKEN = ANGEL_PREFIX + "job.complete.cancel.delegation.tokens"; /** * The hostname of machine that sumbits the application. */ public static final String JOB_SUBMITHOST = ANGEL_PREFIX + "job.submit.host"; /** * The ip of machine that submits the application. */ public static final String JOB_SUBMITHOSTADDR = ANGEL_PREFIX + "job.submit.host.address"; /** * Angel application staging directory. */ public static final String ANGEL_STAGING_DIR = ANGEL_PREFIX + "staging.dir"; public static final String DEFAULT_ANGEL_STAGING_DIR = "/tmp/hadoop-yarn/"; /** * The name of user that submits the application. */ public static final String USER_NAME = ANGEL_PREFIX + "submit.user.name"; /** * Angel application directory, it used to stored libjars and resource files. */ public static final String ANGEL_JOB_DIR = ANGEL_PREFIX + "job.dir"; /** * If the application need some resource files, it can use add the file lists to this parameter. * The files separated by commas. */ public static final String ANGEL_APP_USER_RESOURCE_FILES = "angel.app.user.resource.files"; /** * If the application run over, it write final application state to this file, which can be used */ public static final String ANGEL_APP_SERILIZE_STATE_FILE = "angel.app.serilize.state.file"; /** * Angel application output directory, this parameter is used by Angel itself. */ public static final String ANGEL_JOB_OUTPUT_PATH = ANGEL_PREFIX + "output.path"; /** * Angel application temporary result output directory, this parameter is used by Angel itself. */ public static final String ANGEL_JOB_TMP_OUTPUT_PATH_PREFIX = ANGEL_PREFIX + "tmp.output.path.prefix"; /** * Angel application temporary result output directory, this parameter is used by Angel itself. */ public static final String ANGEL_JOB_TMP_OUTPUT_PATH = ANGEL_PREFIX + "tmp.output.path"; /** * The listen port range for all modules:AppMaster, Workers and PSs */ public static final String ANGEL_LISTEN_PORT_RANGE = ANGEL_PREFIX + "listen.port.range"; public static final String DEFAULT_ANGEL_LISTEN_PORT_RANGE = "20000,30000"; /** * Angel application type name, it is used the display name on YARN RM web page. */ public static final String ANGEL_APPLICATION_TYPE = "ANGEL"; /** * Angel application id. */ public static final String ANGEL_JOB_ID = ANGEL_PREFIX + "jobid"; /** * Angel application configuration file. */ public static final String ANGEL_JOB_CONF_FILE = "job.xml"; /** * Local worker directory */ public static final String LOCAL_DIR = "angel.cluster.local.dir"; public static final String ANGEL_CLIENT_HEARTBEAT_INTERVAL_MS = "angel.client.heartbeat.interval.ms"; public static final int DEFAULT_ANGEL_CLIENT_HEARTBEAT_INTERVAL_MS = 5000; public static final String ANGEL_CLIENT_HEARTBEAT_INTERVAL_TIMEOUT_MS = "angel.client.heartbeat.interval.timeout.ms"; public static final int DEFAULT_ANGEL_CLIENT_HEARTBEAT_INTERVAL_TIMEOUT_MS = 30000; // ////////////////////////////// // Master Configs // ////////////////////////////// /** * Memory quota for AppMaster in MB. */ @Deprecated public static final String ANGEL_AM_MEMORY_MB = ANGEL_AM_PREFIX + "memory.mb"; @Deprecated public static final int DEFAULT_ANGEL_AM_MEMORY_MB = 1280; /** * Memory quota for AppMaster in GB. */ public static final String ANGEL_AM_MEMORY_GB = ANGEL_AM_PREFIX + "memory.gb"; public static final int DEFAULT_ANGEL_AM_MEMORY_GB = 2; /** * JVM parameters for AppMaster. */ public static final String ANGEL_AM_JAVA_OPTS = ANGEL_AM_PREFIX + "java.opts"; public static final String DEFAULT_ANGEL_AM_JAVA_OPTS = "-Xmx1024m"; /** * CPU vcore quota for AppMaster. */ public static final String ANGEL_AM_CPU_VCORES = ANGEL_AM_PREFIX + "cpu.vcores"; public static final int DEFAULT_ANGEL_AM_CPU_VCORES = 1; /** * If there is no training data, workers are also started, just for test. */ public static final String ANGEL_AM_USE_DUMMY_DATASPLITER = ANGEL_AM_PREFIX + "use.dummy.dataspliter"; public static final boolean DEFAULT_ANGEL_AM_USE_DUMMY_DATASPLITER = false; /** * The maximum number of times AppMaster can try. */ public static final String ANGEL_AM_MAX_ATTEMPTS = ANGEL_PREFIX + "am.max-attempts"; public static final int DEFAULT_ANGEL_AM_MAX_ATTEMPTS = 2; /** * AppMaster log level for log4j. */ public static final String ANGEL_AM_LOG_LEVEL = ANGEL_AM_PREFIX + "log.level"; public static final String DEFAULT_ANGEL_AM_LOG_LEVEL = "INFO"; /** * AppMaster main class. */ public static final String ANGEL_AM_CLASS = ANGEL_AM_PREFIX + "class"; public static final String DEFAULT_ANGEL_AM_CLASS = AngelApplicationMaster.class.getName(); /** * AppMaster environment variable settings used for system. */ public static final String ANGEL_AM_ADMIN_USER_ENV = ANGEL_AM_PREFIX + "admin.user.env"; public static final String DEFAULT_ANGEL_AM_ADMIN_USER_ENV = ""; /** * AppMaster environment variable settings used for user, it will cover environment variable * settings in ANGEL_AM_ADMIN_USER_ENV. */ public static final String ANGEL_AM_ENV = ANGEL_AM_PREFIX + "env"; public static final String DEFAULT_ANGEL_AM_ENV = ""; /** * Maximum number of threads used in yarn containers launching. */ public static final String ANGEL_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = ANGEL_AM_PREFIX + "containerlauncher.thread.count"; public static final int DEFAULT_ANGEL_AM_CONTAINERLAUNCHER_THREAD_COUNT_LIMIT = 24; /** * The time interval in milliseconds of AppMaster heartbeat to YARN RM. */ public static final String ANGEL_AM_HEARTBEAT_INTERVAL_MS = ANGEL_AM_PREFIX + "heartbeat.interval.ms"; public static final int DEFAULT_ANGEL_AM_HEARTBEAT_INTERVAL_MS = 5000; /** * The longest allowed time for AppMaster is in the temporary state, it is used to check time out * of control commands from AngelClient. */ public static final String ANGEL_AM_APPSTATE_TIMEOUT_MS = ANGEL_AM_PREFIX + "appstate.timeout.ms"; public static final long DEFAULT_ANGEL_AM_APPSTATE_TIMEOUT_MS = 3000000; /** * The time interval in milliseconds of AppMaster writing application states to hdfs. */ public static final String ANGEL_AM_WRITE_STATE_INTERVAL_MS = ANGEL_AM_PREFIX + "write.state.interval.ms"; public static final int DEFAULT_ANGEL_AM_WRITE_STATE_INTERVAL_MS = 10000; /** * Slow ps/worker check polices */ public static final String ANGEL_AM_SLOW_CHECK_POLICES = ANGEL_AM_PREFIX + "slow.check.polices"; public static final String DEFAULT_ANGEL_AM_SLOW_CHECK_POLICES = TaskCalPerfChecker.class.getName(); /** * Slow ps/worker check enable */ public static final String ANGEL_AM_SLOW_CHECK_ENABLE = ANGEL_AM_PREFIX + "slow.check.enable"; public static final boolean DEFAULT_ANGEL_AM_SLOW_CHECK_ENABLE = false; /** * Slow ps/worker check interval in milliseconds */ public static final String ANGEL_AM_SLOW_CHECK_INTERVAL_MS = ANGEL_AM_PREFIX + "slow.check.interval.ms"; public static final int DEFAULT_ANGEL_AM_SLOW_CHECK_INTERVAL_MS = 60000; /** * Task slowest discount, if a task calculate rate is slow than average rate * discount, the worker * the task is running on will be considered to be a slow worker */ public static final String ANGEL_AM_TASK_SLOWEST_DISCOUNT = ANGEL_AM_PREFIX + "task.slowest.discount"; public static final double DEFAULT_ANGEL_AM_TASK_SLOWEST_DISCOUNT = 0.7; /** * The worker pool size for HDFS operation in Master */ public static final String ANGEL_AM_MATRIX_DISKIO_WORKER_POOL_SIZE = ANGEL_AM_PREFIX + "matrix.diskio.worker.pool.size"; public static final int DEFAULT_ANGEL_AM_MATRIX_DISKIO_WORKER_POOL_SIZE = Math.max(8, (int) (Runtime.getRuntime().availableProcessors() * 0.25)); public static final String ANGEL_MODEL_PARTITIONER_PARTITION_SIZE = "angel.model.partitioner.partition.size"; public static final long DEFAULT_ANGEL_MODEL_PARTITIONER_PARTITION_SIZE = 500000; public static final String ANGEL_MODEL_PARTITIONER_MAX_PARTITION_NUM = "angel.model.partitioner.max.partition.number"; public static final int DEFAULT_ANGEL_MODEL_PARTITIONER_MAX_PARTITION_NUM = 10000; public static final String ANGEL_MODEL_PARTITIONER_PARTITION_NUM_PERSERVER = "angel.model.partitioner.partition.number.perserver"; public static final int DEFAULT_ANGEL_MODEL_PARTITIONER_PARTITION_NUM_PERSERVER = 1; // ////////////////////////////// // Worker Configs // ////////////////////////////// /** * The number of workergroups. It may be adjusted as the training data splits. */ public static final String ANGEL_WORKERGROUP_NUMBER = ANGEL_WORKERGROUP_PREFIX + "number"; public static final int DEFAULT_ANGEL_WORKERGROUP_NUMBER = 1; /** * The number of workergrous that actually startup. Limited by data splitting, the number of data * splits may be not equals to the setting value. Once that happens, the number of workergroups * may be adjusted to an approximate value. */ public static final String ANGEL_WORKERGROUP_ACTUAL_NUM = ANGEL_WORKERGROUP_PREFIX + "actual.number"; /** * The number of workers in a workergroup, now just support a worker in a workergroup. */ public static final String ANGEL_WORKERGROUP_WORKER_NUMBER = ANGEL_WORKERGROUP_PREFIX + "worker.number"; public static final int DEFAULT_ANGEL_WORKERGROUP_WORKER_NUMBER = 1; /** * The number of the tasks run in a worker. */ public static final String ANGEL_WORKER_TASK_NUMBER = ANGEL_WORKER_PREFIX + "task.number"; public static final int DEFAULT_ANGEL_WORKER_TASK_NUMBER = 1; /** * The memory quota for a single worker in MB. */ @Deprecated public static final String ANGEL_WORKER_MEMORY_MB = ANGEL_WORKER_PREFIX + "memory.mb"; @Deprecated public static final int DEFAULT_ANGEL_WORKER_MEMORY_MB = 4096; /** * The memory quota for a single worker in GB. */ public static final String ANGEL_WORKER_MEMORY_GB = ANGEL_WORKER_PREFIX + "memory.gb"; public static final int DEFAULT_ANGEL_WORKER_MEMORY_GB = 4; /** * The CPU vcore quota for a single worker in MB. */ public static final String ANGEL_WORKER_CPU_VCORES = ANGEL_WORKER_PREFIX + "cpu.vcores"; public static final int DEFAULT_ANGEL_WORKER_CPU_VCORES = 1; /*** Worker environment variable settings. */ public static final String ANGEL_WORKER_ENV = ANGEL_WORKER_PREFIX + "env"; public static final String DEFAULT_ANGEL_WORK_ENV = ""; /** * Worker log level for log4j. */ public static final String ANGEL_WORKER_LOG_LEVEL = ANGEL_WORKER_PREFIX + "log.level"; public static final String DEFAULT_ANGEL_WORKER_LOG_LEVEL = "INFO"; /** * Worker JVM parameters. */ public static final String ANGEL_WORKER_JAVA_OPTS = ANGEL_WORKER_PREFIX + "java.opts"; /** * Worker main class. */ public static final String ANGEL_WORKER_CLASS = ANGEL_WORKER_PREFIX + "class"; public static final String DEFAULT_ANGEL_WORKER_CLASS = Worker.class.getName(); /** * Worker resource priority, it use to YARN container allocation. The smaller the priority, the * higher the priority. */ public static final String ANGEL_WORKER_PRIORITY = ANGEL_WORKER_PREFIX + "priority"; public static final int DEFAULT_ANGEL_WORKER_PRIORITY = 20; /** * The maxinum staleness value between tasks, it used to consistency controlling between tasks. 0 * means BSP, bigger then 0 means SSP, -1 means ASYNC. */ public static final String ANGEL_STALENESS = ANGEL_PREFIX + "staleness"; public static final int DEFAULT_ANGEL_STALENESS = 0; /** * The time interval in milliseconds of worker heartbeats to AppMaster. */ public static final String ANGEL_WORKER_HEARTBEAT_INTERVAL_MS = ANGEL_WORKER_PREFIX + "heartbeat.interval.ms"; public static final int DEFAULT_ANGEL_WORKER_HEARTBEAT_INTERVAL = 5000; /** * The maximum time in milliseconds for AppMaster waiting for heartbeats from workers. Once a * worker does not send heartbeat to AppMaster during setting time, it can be considered that the * worker has been down. */ public static final String ANGEL_WORKER_HEARTBEAT_TIMEOUT_MS = ANGEL_WORKER_PREFIX + "heartbeat.timeout.ms"; public static final long DEFAULT_ANGEL_WORKER_HEARTBEAT_TIMEOUT_MS = 600000; public static final String ANGEL_WORKERGROUP_FAILED_TOLERATE = ANGEL_WORKERGROUP_PREFIX + "failed.tolerate"; public static final double DEFAULT_WORKERGROUP_FAILED_TOLERATE = 0.1; public static final String ANGEL_TASK_ERROR_TOLERATE = ANGEL_PREFIX + "task.error.tolerate"; public static final double DEFAULT_ANGEL_TASK_ERROR_TOLERATE = 0.0; /** * The maximum number of times AppMaster can try. */ public static final String ANGEL_WORKER_MAX_ATTEMPTS = ANGEL_WORKER_PREFIX + "max-attempts"; public static final int DEFAULT_WORKER_MAX_ATTEMPTS = 4; public static final String ANGEL_WORKER_JVM_DIRECT_FACTOR_USE_DIRECT_BUFF = ANGEL_WORKER_PREFIX + "jvm.direct.factor.use.direct.buff"; public static final float DEFAULT_ANGEL_WORKER_JVM_DIRECT_FACTOR_USE_DIRECT_BUFF = 0.3f; public static final String ANGEL_WORKER_JVM_DIRECT_FACTOR_USE_HEAP_BUFF = ANGEL_WORKER_PREFIX + "jvm.direct.factor.use.direct.buff"; public static final float DEFAULT_ANGEL_WORKER_JVM_DIRECT_FACTOR_USE_HEAP_BUFF = 0.2f; public static final String ANGEL_WORKER_JVM_YOUNG_FACTOR = ANGEL_WORKER_PREFIX + "jvm.young.factor"; public static final float DEFAULT_ANGEL_WORKER_JVM_YOUNG_FACTOR = 0.4f; /** * The workers number for matrix operations */ public static final String ANGEL_WORKER_MATRIX_EXECUTORS_NUM = ANGEL_WORKER_PREFIX + "matrix.executors.num"; public static final int DEFAULT_ANGEL_WORKER_MATRIX_EXECUTORS_NUM = 16; // ////////////////////////////// // Task Configs // ////////////////////////////// /** * The number of total tasks that actually startup. Limited by data splitting, the number of data * splits may be not equals to the setting value. Once that happens, the number of tasks will be * adjusted to the number of data splits. */ public static final String ANGEL_TASK_ACTUAL_NUM = ANGEL_TASK_PREFIX + "actual.number"; /** * Task iteration number. */ public static final String ANGEL_TASK_ITERATION_NUMBER = ANGEL_TASK_PREFIX + "iteration.number"; public static final int DEFAULT_ANGEL_TASK_ITERATION_NUMBER = 100; /** * The task class that the workers will run, it is set by users. */ public static final String ANGEL_TASK_USER_TASKCLASS = ANGEL_TASK_PREFIX + "user.task.class"; public static final String DEFAULT_ANGEL_TASK_USER_TASKCLASS = BaseTask.class.getName(); /** * The storage level for data blocks. There are three level now:memory, memory_disk, disk. The * default mode is memory_disk. *

* memory:all data blocks are stored in memory, if the worker's memory is large enough, we can use * this level. *

* memory_disk:try to keep all data blocks in memory, if the memory does not fit, put the extra * part into the disk. *

* disk:all data blocks are stored in disk. */ public static final String ANGEL_TASK_DATA_STORAGE_LEVEL = ANGEL_TASK_PREFIX + "data.storage.level"; public static final String DEFAULT_ANGEL_TASK_DATA_STORAGE_LEVEL = "memory_disk"; /** * The read buffer size for reading data from disk. */ public static final String ANGEL_TASK_DISK_READ_BUFFER_SIZE = ANGEL_TASK_PREFIX + "disk.read.buffer.size"; public static final int DEFAULT_ANGEL_TASK_DISK_READ_BUFFER_SIZE = 4 * 1024 * 1024; /** * The maximum allowed memory in MB used in memory_disk level storage for every task. */ public static final String ANGEL_TASK_MEMORYSTORAGE_USE_MAX_MEMORY_MB = ANGEL_TASK_PREFIX + "memory.storage.max.mb"; public static final int DEFAULT_ANGEL_TASK_MEMORYSTORAGE_USE_MAX_MEMORY_MB = 1000; /** * The number of samples used to estimate average size. */ public static final String ANGEL_TASK_ESTIMIZE_SAMPLE_NUMBER = ANGEL_TASK_PREFIX + "estimize.sample.number"; public static final int DEFAULT_ANGEL_TASK_ESTIMIZE_SAMPLE_NUMBER = 100; /** * The write buffer size for writing data to disk. */ public static final String ANGEL_TASK_DISK_WRITE_BUFFER_SIZE = ANGEL_TASK_PREFIX + "writer.buffer.size"; public static final int DEFAULT_ANGEL_TASK_DISK_WRITE_BUFFER_SIZE = 4 * 1024 * 1024; /** * The maximum size in MB of a disk file. */ public static final String ANGEL_TASK_RECORD_FILE_MAXSIZE_MB = ANGEL_TASK_PREFIX + "record.file.maxsize.mb"; public static final int DEFAULT_ANGEL_TASK_RECORD_FILE_MAXSIZE_MB = 1024; // ////////////////////////////// // ParameterServer Configs // ////////////////////////////// /** * The number of ps. */ public static final String ANGEL_PS_NUMBER = ANGEL_PS_PREFIX + "number"; public static final int DEFAULT_ANGEL_PS_NUMBER = 1; /** * The number of ps. */ public static final String ANGEL_PS_HA_REPLICATION_NUMBER = ANGEL_PS_PREFIX + "ha.replication.number"; public static final int DEFAULT_ANGEL_PS_HA_REPLICATION_NUMBER = 1; public static final String ANGEL_PS_HA_USE_EVENT_PUSH = ANGEL_PS_PREFIX + "ha.use.event.push"; public static final boolean DEFAULT_ANGEL_PS_HA_USE_EVENT_PUSH = false; public static final String ANGEL_PS_HA_PUSH_SYNC = ANGEL_PS_PREFIX + "ha.push.sync"; public static final boolean DEFAULT_ANGEL_PS_HA_PUSH_SYNC = true; public static final String ANGEL_PS_HA_PUSH_INTERVAL_MS = ANGEL_PS_PREFIX + "push.interval.ms"; public static final int DEFAULT_ANGEL_PS_HA_PUSH_INTERVAL_MS = 30000; /** * The CPU vcore quota for a single ps. */ public static final String ANGEL_PS_CPU_VCORES = ANGEL_PS_PREFIX + "cpu.vcores"; public static final int DEFAULT_ANGEL_PS_CPU_VCORES = 1; /** * The memory quota for a single worker in MB. */ @Deprecated public static final String ANGEL_PS_MEMORY_MB = ANGEL_PS_PREFIX + "memory.mb"; @Deprecated public static final int DEFAULT_ANGEL_PS_MEMORY_MB = 4096; /** * The memory quota for a single worker in GB. */ public static final String ANGEL_PS_MEMORY_GB = ANGEL_PS_PREFIX + "memory.gb"; public static final int DEFAULT_ANGEL_PS_MEMORY_GB = 4; /** * The time interval in milliseconds of a ps writing the snapshot for matrices to hdfs. */ public static final String ANGEL_PS_BACKUP_INTERVAL_MS = ANGEL_PS_PREFIX + "backup.interval.ms"; public static final int DEFAULT_ANGEL_PS_BACKUP_INTERVAL_MS = 300000; /** * The matrices that need to backup in SnapshotDumper */ public static final String ANGEL_PS_BACKUP_MATRICES = ANGEL_PS_PREFIX + "backup.matrices"; /** * The maximum number of times a ps can retry when run failed. */ public static final String ANGEL_PS_MAX_ATTEMPTS = ANGEL_PS_PREFIX + "max-attempts"; public static final int DEFAULT_PS_MAX_ATTEMPTS = 4; /** * Ps environment variable settings. */ public static final String ANGEL_PS_ENV = ANGEL_PS_PREFIX + "env"; public static final String DEFAULT_ANGEL_PS_ENV = ""; /** * Ps log level for log4j. */ public static final String ANGEL_PS_LOG_LEVEL = ANGEL_PS_PREFIX + "log.level"; public static final String DEFAULT_ANGEL_PS_LOG_LEVEL = "INFO"; /** * Ps JVM parameters. */ public static final String ANGEL_PS_JAVA_OPTS = ANGEL_PS_PREFIX + "child.opts"; /** * Ps main class. */ public static final String ANGEL_PS_CLASS = ANGEL_PS_PREFIX + "class"; public static final String DEFAULT_ANGEL_PS_CLASS = ParameterServer.class.getName(); /** * The time interval in milliseconds of ps heartbeats to AppMaster. */ public static final String ANGEL_PS_HEARTBEAT_INTERVAL_MS = ANGEL_PS_PREFIX + "heartbeat.interval.ms"; public static final int DEFAULT_ANGEL_PS_HEARTBEAT_INTERVAL_MS = 5000; /** * PS HA update sync worker number */ public static final String ANGEL_PS_HA_SYNC_WORKER_NUM = ANGEL_PS_PREFIX + "ha.sync.worker.number"; public static final int DEFAULT_ANGEL_PS_HA_SYNC_WORKER_NUM = Math.max(8, (int) (Runtime.getRuntime().availableProcessors() * 0.25)); /** * PS HA update sync worker send buffer size */ public static final String ANGEL_PS_HA_SYNC_SEND_BUFFER_SIZE = ANGEL_PS_PREFIX + "ha.sync.send.buffer.size"; public static final int DEFAULT_ANGEL_PS_HA_SYNC_SEND_BUFFER_SIZE = 1024 * 1024; /** * Ps resource priority, it use to YARN container allocation. The smaller the priority, the higher * the priority. */ public static final String ANGEL_PS_PRIORITY = ANGEL_PS_PREFIX + "priority"; public static final int DEFAULT_ANGEL_PS_PRIORITY = 10; /** * The maximum time in milliseconds for AppMaster waiting for heartbeats from pss. Once a ps does * not send heartbeat to AppMaster during setting time, it can be considered that the ps has been * down. */ public static final String ANGEL_PS_HEARTBEAT_TIMEOUT_MS = ANGEL_PS_PREFIX + "heartbeat.timeout.ms"; public static final long DEFAULT_ANGEL_PS_HEARTBEAT_TIMEOUT_MS = 600000; /** * PS executors thread pool size */ public static final String ANGEL_PS_MATRIX_DISKIO_WORKER_POOL_SIZE = ANGEL_PS_PREFIX + "matrix.diskio.worker.pool.size"; /** * Default PS executors thread pool size */ public static final int DEFAULT_ANGEL_PS_MATRIX_DISKIO_WORKER_POOL_SIZE = Math.max(16, (int) (Runtime.getRuntime().availableProcessors() * 0.25)); public static final String ANGEL_PS_MAX_PARTITION_NUM_SINGLE_FILE = ANGEL_PS_PREFIX + "max.partition.number.single.file"; public static final int DEFAULT_ANGEL_PS_MAX_PARTITION_NUM_SINGLE_FILE = 100; /** * Server Partition source data Class */ public static final String ANGEL_PS_PARTITION_SOURCE_CLASS = ANGEL_PS_PREFIX + "partition.source.class"; public static final String DEFAULT_ANGEL_PS_PARTITION_SOURCE_CLASS = PartitionSourceMap.class.getName(); /** * partitionClass for matrix */ public static final String Angel_PS_PARTITION_CLASS = ANGEL_PS_PREFIX + "partition.class"; public static final String ANGEL_PS_MAX_LOCK_WAITTIME_MS = ANGEL_PS_PREFIX + "max.lock.waittime"; public static final int DEFAULT_ANGEL_PS_MAX_LOCK_WAITTIME_MS = 10000; public static final String ANGEL_PS_USE_ADAPTIVE_STORAGE_ENABLE = ANGEL_PS_PREFIX + "use.adaptive.storage.enable"; public static final boolean DEFAULT_ANGEL_PS_USE_ADAPTIVE_STORAGE_ENABLE = true; public static final String ANGEL_PS_SPARSE_TO_DENSE_FACTOR = ANGEL_PS_PREFIX + "sparse.to.dense.factor"; public static final float DEFAULT_ANGEL_PS_SPARSE_TO_DENSE_FACTOR = 0.25f; public static final String ANGEL_PS_JVM_DIRECT_FACTOR_USE_DIRECT_BUFF = ANGEL_PS_PREFIX + "jvm.direct.factor.use.direct.buff"; public static final float DEFAULT_ANGEL_PS_JVM_DIRECT_FACTOR_USE_DIRECT_BUFF = 0.45f; public static final String ANGEL_PS_JVM_DIRECT_FACTOR_USE_HEAP_BUFF = ANGEL_PS_PREFIX + "jvm.direct.factor.use.direct.buff"; public static final float DEFAULT_ANGEL_PS_JVM_DIRECT_FACTOR_USE_HEAP_BUFF = 0.25f; public static final String ANGEL_PS_JVM_YOUNG_FACTOR = ANGEL_PS_PREFIX + "jvm.young.factor"; public static final float DEFAULT_ANGEL_PS_JVM_YOUNG_FACTOR = 0.4f; // ////////////////// IPC ////////////////////////// /** * The read buffer size for rpc message encoded by protobuf. */ public static final String ANGEL_REQUEST_PB_READBUFFER_LIMIT = "request.pb.readbuffer.limit"; public static final int DEFAULT_ANGEL_REQUEST_PB_READBUFFER_LIMIT = 64 << 20; /** * The maximum package size for a rpc message. */ public static final String ANGEL_RPC_MAX_PACKAGE_SIZE = "angel.rpc.package.max.size"; public static final int DEFAULT_ANGEL_RPC_MAX_PACKAGE_SIZE = 4 << 20; /** * The maximum time in seconds waiting for a rpc response. */ public static final String ANGEL_READ_TIMEOUT_SEC = "angel.read.timeout.sec"; public static final int DEFAULT_ANGEL_READ_TIMEOUT_SEC = 100; /** * The rpc retry time interval in milliseconds. */ public static final String ANGEL_REQUEST_SLEEP_TIME_MS = "angel.request.sleep.time.ms"; public static final int DEFAULT_ANGEL_REQUEST_SLEEP_TIME_MS = 1000; // ////////////////////////////// // Matrix transfer Configs. // ////////////////////////////// /** * The maximum message size in a matrix transfer rpc. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_MAX_MESSAGE_SIZE = "angel.netty.matrixtransfer.max.message.size"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_MAX_MESSAGE_SIZE = 100 * 1024 * 1024; ; /** * The eventgroup thread number for netty client for matrix transfer. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_EVENTGROUP_THREADNUM = "angel.netty.matrixtransfer.client.eventgroup.threadnum"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_EVENTGROUP_THREADNUM = Runtime.getRuntime().availableProcessors() * 2; /** * The send buffer size for netty client for matrix transfer. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_SNDBUF = "angel.netty.matrixtransfer.client.sndbuf"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_SNDBUF = 1024 * 1024; /** * The receive buffer size for netty client for matrix transfer. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_RCVBUF = "angel.netty.matrixtransfer.client.rcvbuf"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_RCVBUF = 1024 * 1024; /** * The max number of channels to a ps. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_MAX_CONN_PERSERVER = "angel.netty.matrixtransfer.client.max.connect.perserver"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_MAX_CONN_PERSERVER = 5; /** * The min number of channels to a ps. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_MIN_CONN_PERSERVER = "angel.netty.matrixtransfer.client.min.connect.perserver"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_MIN_CONN_PERSERVER = 5; /** * The get channel timeout in milliseconds. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_GET_CONN_TIMEOUT_MS = "angel.netty.matrixtransfer.client.max.get.connect.timeout.ms"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_GET_CONN_TIMEOUT_MS = 10000; /** * The max idle time for a channel */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_MAX_CONN_IDLETIME_MS = "angel.netty.matrixtransfer.client.max.connect.idletime.ms"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_MAX_CONN_IDLETIME_MS = 60000; /** * Netty channel io ratio */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_IORATIO = "angel.netty.matrixtransfer.client.ioratio"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_IORATIO = 50; /** * Netty Channel Type: nio or epoll */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_CHANNEL_TYPE = "angel.netty.matrixtransfer.client.channel.type"; public static final String DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_CHANNEL_TYPE = "nio"; /** * Netty channel io ratio */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_IORATIO = "angel.netty.matrixtransfer.server.ioratio"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_IORATIO = 50; /** * Netty Channel Type: nio or epoll */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_CHANNEL_TYPE = "angel.netty.matrixtransfer.server.channel.type"; public static final String DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_CHANNEL_TYPE = "nio"; /** * The eventgroup thread number for netty server for matrix transfer. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_EVENTGROUP_THREADNUM = "angel.netty.matrixtransfer.server.eventgroup.threadnum"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_EVENTGROUP_THREADNUM = Runtime.getRuntime().availableProcessors() * 2; /** * The eventgroup thread number for netty server for serving transfer. */ public static final String ANGEL_NETTY_SERVING_TRANSFER_SERVER_EVENTGROUP_THREADNUM = "angel.netty.serving.transfer.server.eventgroup.threadnum"; public static final int DEFAULT_ANGEL_NETTY_SERVING_TRANSFER_SERVER_EVENTGROUP_THREADNUM = Runtime.getRuntime().availableProcessors() * 2; /** * The send buffer size for netty server for matrix transfer. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_SNDBUF = "angel.netty.matrixtransfer.server.sndbuf"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_SNDBUF = 1024 * 1024; /** * The receive buffer size for netty server for matrix transfer. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_RCVBUF = "angel.netty.matrixtransfer.server.rcvbuf"; public static final int DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_RCVBUF = 1024 * 1024; /** * The maximum allowed number of matrix transfer requests which are sending to a single * server(ps). It used to flow-control between psagent and ps. */ public static final String ANGEL_MATRIXTRANSFER_MAX_REQUESTNUM_PERSERVER = ANGEL_PREFIX + "matrixtransfer.max.requestnum.perserver"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_MAX_REQUESTNUM_PERSERVER = 16; public static final String ANGEL_MATRIXTRANSFER_CLIENT_REQUESTER_POOL_SIZE = ANGEL_PREFIX + "matrixtransfer.client.requester.pool.size"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_CLIENT_REQUESTER_POOL_SIZE = Math.max(16, (int) (Runtime.getRuntime().availableProcessors() * 0.5)); public static final String ANGEL_MATRIXTRANSFER_CLIENT_RESPONSER_POOL_SIZE = ANGEL_PREFIX + "matrixtransfer.client.responser.pool.size"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_CLIENT_RESPONSER_POOL_SIZE = Math.max(16, (int) (Runtime.getRuntime().availableProcessors() * 0.5)); public static final String ANGEL_MATRIXTRANSFER_SERVER_WORKER_POOL_SIZE = ANGEL_PREFIX + "matrixtransfer.server.worker.pool.size"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_SERVER_WORKER_POOL_SIZE = Runtime.getRuntime().availableProcessors(); public static final String ANGEL_MATRIXTRANSFER_SERVER_TOKEN_TIMEOUT_MS = ANGEL_PREFIX + "matrixtransfer.server.token.timeout.ms"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_SERVER_TOKEN_TIMEOUT_MS = 10000; public static final String ANGEL_MATRIXTRANSFER_SERVER_RPC_LIMIT_FACTOR = ANGEL_PREFIX + "matrixtransfer.server.rpc.limit.factor"; public static final float DEFAULT_ANGEL_MATRIXTRANSFER_SERVER_RPC_LIMIT_FACTOR = 64.0f; public static final String ANGEL_MATRIXTRANSFER_SERVER_RPC_LIMIT_GENERAL_FACTOR = ANGEL_PREFIX + "matrixtransfer.server.rpc.limit.general.factor"; public static float DEFAULT_ANGEL_MATRIXTRANSFER_SERVER_RPC_LIMIT_GENERAL_FACTOR = 0.0f; public static final String ANGEL_MATRIXTRANSFER_SERVER_SENDER_POOL_SIZE = ANGEL_PREFIX + "matrixtransfer.server.sender.pool.size"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_SERVER_SENDER_POOL_SIZE = Math.max(8, (int) (Runtime.getRuntime().availableProcessors() * 0.25)); public static final String ANGEL_MATRIXTRANSFER_SERVER_USE_ASYNC_HANDLER = ANGEL_PREFIX + "matrixtransfer.server.use.async.handler"; public static final boolean DEFAULT_ANGEL_MATRIXTRANSFER_SERVER_USE_ASYNC_HANDLER = true; public static final String ANGEL_MATRIX_OPLOG_MERGER_POOL_SIZE = ANGEL_PREFIX + "matrix.oplog.merger.pool.size"; public static final int DEFAULT_ANGEL_MATRIX_OPLOG_MERGER_POOL_SIZE = Math.max(8, (int) (Runtime.getRuntime().availableProcessors() * 0.25)); /** * The maximum allowed number of matrix transfer requests which are sending to the servers(ps). It * used to flow-control between psagent and ps. */ public static final String ANGEL_MATRIXTRANSFER_MAX_REQUESTNUM = ANGEL_PREFIX + "matrixtransfer.max.requestnum"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_MAX = 1024; /** * The maximum allowed size of requests/responses which are in flight. It used to flow-control * between psagent and ps. */ public static final String ANGEL_NETWORK_MAX_BYTES_FLIGHT = "angel.network.max.bytes.flight"; public static final int DEFAULT_ANGEL_NETWORK_MAX_BYTES_FLIGHT = 1000 * 1024 * 1024; /** * If the requests to a ps continuous failure over limit, the ps may be down, we will re-fetch the * location for the ps from AppMaster. */ public static final String ANGEL_REFRESH_SERVERLOCATION_THRESHOLD = ANGEL_PREFIX + "refresh.serverlocation.threshold"; public static final int DEFAULT_ANGEL_REFRESH_SERVERLOCATION_THRESHOLD = 5; /** * The time interval in milliseconds for failed matrix transfer requests. */ public static final String ANGEL_MATRIXTRANSFER_RETRY_INTERVAL_MS = ANGEL_PREFIX + "matrixtransfer.retry.interval.ms"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_RETRY_INTERVAL_MS = 10000; /** * Max retry number for a PS-RPC */ public static final String ANGEL_MATRIXTRANSFER_MAX_TRY_COUNTER = ANGEL_PREFIX + "matrixtransfer.max.retry.counter"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_MAX_TRY_COUNTER = 10; /** * Weather we need use direct buffer in netty client. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_USEDIRECTBUFFER = "angel.netty.matrixtransfer.client.usedirectbuffer"; public static final boolean DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_USEDIRECTBUFFER = true; /** * Weather we need use pooled buffer in netty server. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_CLIENT_USEPOOL = "angel.netty.matrixtransfer.client.usepool"; public static final boolean DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_CLIENT_USEPOOL = false; /** * Weather we need use direct buffer in netty server. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_USEDIRECTBUFFER = "angel.netty.matrixtransfer.server.usedirectbuffer"; public static final boolean DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_USEDIRECTBUFFER = true; /** * Weather we need use direct buffer in netty server. */ public static final String ANGEL_NETTY_MATRIXTRANSFER_SERVER_USEPOOL = "angel.netty.matrixtransfer.server.usepool"; public static final boolean DEFAULT_ANGEL_NETTY_MATRIXTRANSFER_SERVER_USEPOOL = false; /** * The maximum time in milliseconds of waiting for the response. If the response of a request is * not received in allowed time, the request will be timeout. The timeout request will be retry * later. */ public static final String ANGEL_MATRIXTRANSFER_REQUEST_TIMEOUT_MS = ANGEL_PREFIX + "matrixtransfer.request.timeout.ms"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_REQUEST_TIMEOUT_MS = 30000; /** * The time interval in milliseconds of clock events. We will check timeout requests and retry * failed requests when clock event happened. */ public static final String ANGEL_MATRIXTRANSFER_CHECK_INTERVAL_MS = ANGEL_PREFIX + "matrixtransfer.check.interval.ms"; public static final int DEFAULT_ANGEL_MATRIXTRANSFER_CHECK_INTERVAL_MS = 100; // ////////////////////////////// // Matrix transfer Configs. // ////////////////////////////// public static final String ANGEL_PSAGENT_PREFIX = "angel.psagent."; /** * PSAgent caches synchronization time interval in milliseconds. The caches contain matrix caches * and clock caches, which synchronize data from pss regularly. Matrix caches are used to cache * matrix data splits. Clock caches are used to cache the clock values of all matrix partitions. */ public static final String ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS = ANGEL_PSAGENT_PREFIX + "cache.sync.timeinterval.ms"; public static final int DEFAULT_ANGEL_PSAGENT_CACHE_SYNC_TIMEINTERVAL_MS = 200; /** * The matrix caches synchronization policy */ public static final String ANGEL_PSAGENT_CACHE_SYNC_POLICY_CLASS = ANGEL_PSAGENT_PREFIX + "sync.policy.class"; public static final String DEFAULT_ANGEL_PSAGENT_CACHE_SYNC_POLICY_CLASS = DefaultPolicy.class.getName(); public static final String ANGEL_PSAGENT_TO_PS_HEARTBEAT_INTERVAL_MS = ANGEL_PSAGENT_PREFIX + "to.ps.heartbeat.interval.ms"; public static final int DEFAULT_ANGEL_PSAGENT_TO_PS_HEARTBEAT_INTERVAL_MS = 5000; public static final String ANGEL_PSAGENT_TO_PS_HEARTBEAT_TIMEOUT_MS = ANGEL_PSAGENT_PREFIX + "to.ps.heartbeat.timeout.ms"; public static final int DEFAULT_ANGEL_PSAGENT_TO_PS_HEARTBEAT_TIMEOUT_MS = 20000; public static final String ANGEL_PSAGENT_UPDATE_SPLIT_ADAPTION_ENABLE = ANGEL_PSAGENT_PREFIX + "update.split.adaption.enable"; public static final boolean DEFAULT_ANGEL_PSAGENT_UPDATE_SPLIT_ADAPTION_ENABLE = true; public static final String ANGEL_PSAGENT_UPDATE_SPLIT_VIEW_ENABLE = ANGEL_PSAGENT_PREFIX + "update.split.view.enable"; public static final boolean DEFAULT_ANGEL_PSAGENT_UPDATE_SPLIT_VIEW_ENABLE = true; /** * The machine addresses on which the pss are expected to run. The addressed are separated by * commas. */ public static final String ANGEL_PS_IP_LIST = ANGEL_PS_PREFIX + "ip.list"; /** * Weather we need update the matrix clock and task iteration to master synchronously. */ public static final String ANGEL_PSAGENT_SYNC_CLOCK_ENABLE = ANGEL_PSAGENT_PREFIX + "sync.clock.enable"; public static final boolean DEFAULT_ANGEL_PSAGENT_SYNC_CLOCK_ENABLE = true; // Configs used to ANGEL_PS_PSAGENT running mode future. public static final String ANGEL_PSAGENT_NUMBER = ANGEL_PSAGENT_PREFIX + "number"; public static final int DEFAULT_ANGEL_PSAGENT_NUMBER = 1; public static final String ANGEL_PSAGENT_MERMORY_MB = ANGEL_PSAGENT_PREFIX + "memory.mb"; public static final int DEFAULT_ANGEL_PSAGENT_MERMORY_MB = 20000; public static final String ANGEL_PSAGENT_CPU_VCORES = ANGEL_PSAGENT_PREFIX + "cpu.vcores"; public static final int DEFAULT_ANGEL_PSAGENT_CPU_VCORES = 5; public static final String ANGEL_PSAGENT_PRIORITY = ANGEL_PSAGENT_PREFIX + "priority"; public static final int DEFAULT_ANGEL_PSAGENT_PRIORITY = 5; public static final String ANGEL_PSAGENT_MAX_ATTEMPTS = ANGEL_PSAGENT_PREFIX + "max.attempts"; public static final int DEFAULT_ANGEL_PSAGENT_MAX_ATTEMPTS = 4; public static final String ANGEL_PSAGENT_ENV = ANGEL_PSAGENT_PREFIX + "env"; public static final String DEFAULT_ANGEL_PSAGENT_ENV = ""; public static final String ANGEL_PSAGNET_LOG_LEVEL = ANGEL_PSAGENT_PREFIX + "log.level"; public static final String DEFAULT_ANGEL_PSAGNET_LOG_LEVEL = "INFO"; public static final String ANGEL_PSAGENT_JAVA_OPTS = ANGEL_PSAGENT_PREFIX + "java.opts"; public static final String ANGEL_PSAGENT_CLASS = ANGEL_PSAGENT_PREFIX + "class"; public static final String DEFAULT_ANGEL_PSAGENT_CLASS = PSAgent.class.getName(); public static final String ANGEL_PSAGENT_IPLIST = ANGEL_PSAGENT_PREFIX + "iplist"; public static final String ANGEL_PSAGENT_HEARTBEAT_TIMEOUT_MS = ANGEL_PSAGENT_PREFIX + "heartbeat.timeout.ms"; public static final long DEFAULT_ANGEL_PSAGENT_HEARTBEAT_TIMEOUT_MS = 60000; // model parse public static final String ANGEL_MODEL_PARSE_THREAD_COUNT = "angel.model.parse.thread.count"; public static final String ANGEL_MODEL_PARSE_NAME = "angel.model.parse.name"; public static final String ANGEL_PARSE_MODEL_PATH = "angel.parse.model.path"; public static final int DEFAULT_ANGEL_MODEL_PARSE_THREAD_COUNT = 5; public static final String ML_CONNECTION_TIMEOUT_MILLIS = "ml.connection.timeout"; /** * If not specified, the default connection timeout will be used (3 sec). */ public static final long DEFAULT_CONNECTION_TIMEOUT_MILLIS = 3 * 1000L; public static final String CONNECTION_READ_TIMEOUT_SEC = "ml.connection.read.timeout"; /** * If not specified, the default connection read timeout will be used (300 sec). */ public static final int DEFAULT_CONNECTION_READ_TIMEOUT_SEC = 60 * 5; public static final int DEFAULT_READ_TIMEOUT_SEC = 10; public static final String SERVER_IO_THREAD = "netty.server.io.threads"; public static final String NETWORK_IO_MODE = "netty.io.mode"; public static final String CLIENT_IO_THREAD = "netty.client.io.threads"; /** * timeout for each RPC */ public static final String ML_RPC_TIMEOUT_KEY = "ml.rpc.timeout"; public static final int DEFAULT_ML_RPC_TIMEOUT = 60000; public static final String ML_CLIENT_RPC_MAXATTEMPTS = "ml.client.rpc.maxattempts"; // Mark whether use pyangel or not. public static final String ANGEL_API_TYPE = "angel.app.type"; public static final String PYANGEL_PYTHON = "angel.pyangel.python"; public static final String PYANGEL_PYFILE = "angel.pyangel.pyfile"; public static final String PYANGEL_PYDEPFILES = "angel.pyangel.pyfile.dependencies"; public static final String ANGEL_PLUGIN_SERVICE_ENABLE = "angel.plugin.service.enable"; public static final String ANGEL_SERVING_SHARDING_NUM = "angel.serving.sharding.num"; public static final String ANGEL_SERVING_SHARDING_CONCURRENT_CAPACITY = "angel.serving.sharding.concurrent.capacity"; public static final String ANGEL_SERVING_SHARDING_MODEL_CLASS = "angel.serving.sharding.model.class"; public static final String ANGEL_SERVING_MASTER_IP = "angel.serving.master.ip"; public static final String ANGEL_SERVING_MASTER_PORT = "angel.serving.master.port"; public static final String ANGEL_SERVING_MODEL_NAME = "angel.serving.model.name"; public static final String ANGEL_SERVING_MODEL_LOAD_TIMEOUT_MINUTE = "angel.serving.model.load.timeout.minute"; public static final String ANGEL_SERVING_MODEL_LOAD_CHECK_INTEVAL_SECOND = "angel.serving.model.load.check.inteval.second"; public static final String ANGEL_SERVING_MODEL_LOAD_TYPE = "angel.serving.model.load.type"; public static final String ANGEL_SERVING_PREDICT_LOCAL_OUTPUT = "angel.serving.predict.local.output"; /** * Default value of {@link #ML_CLIENT_RPC_MAXATTEMPTS}. */ public static final int DEFAULT_ML_CLIENT_RPC_MAXATTEMPTS = 1; public static int[] RETRY_BACKOFF = {1, 1, 1, 2, 2, 4, 4, 8, 16, 32}; public static Configuration create() { Configuration conf = new Configuration(); conf.setClassLoader(AngelConf.class.getClassLoader()); return addAngelResources(conf); } public static Configuration addAngelResources(Configuration conf) { conf.addResource(ANGEL_DEFAULT_XML_FILE); conf.addResource(ANGEL_SITE_XML_FILE); return conf; } public Properties getAngelProps() { return getProps(); } /** * @param that Configuration to clone. * @return the cloned configuration */ public static Configuration clone(final Configuration that) { Configuration conf = new Configuration(that); conf.setClassLoader(AngelConf.class.getClassLoader()); return conf; } /** * Merge two configurations. * * @param destConf the configuration that will be overwritten with items from the srcConf * @param srcConf the source configuration **/ public static void merge(Configuration destConf, Configuration srcConf) { for (Map.Entry e : srcConf) { destConf.set(e.getKey(), e.getValue()); } } public class ANGEL_MODEL_PARTITIONER_DEFAULT_PARTITION_SIZE { } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy