com.tencent.angel.master.metrics.MetricsService Maven / Gradle / Ivy
/*
* Tencent is pleased to support the open source community by making Angel available.
*
* Copyright (C) 2017-2018 THL A29 Limited, a Tencent company. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
*
* https://opensource.org/licenses/Apache-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*
*/
package com.tencent.angel.master.metrics;
import com.tencent.angel.master.app.AMContext;
import com.tencent.angel.ml.metric.Metric;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.service.AbstractService;
import org.apache.hadoop.yarn.event.EventHandler;
import org.codehaus.jackson.map.ObjectMapper;
import java.io.IOException;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* Algorithm log service. It summary all task counters to generate global counters, then calculate
* the algorithm indexes use these global counters. It holds the algorithm indexes in memory for
* angel client and write them to hdfs also.
*/
public class MetricsService extends AbstractService implements EventHandler {
static final Log LOG = LogFactory.getLog(MetricsService.class);
/**
* Application context
*/
private final AMContext context;
/**
* Iteration number -> (algorithm metric name -> value)
*/
private final Map> iterToMetricsMap;
/**
* Algorithm metric name to Metric map
*/
private final Map metricsCache;
/**
* Algorithm index calculate thread
*/
private volatile Thread handler;
/**
* Event queue
*/
private final LinkedBlockingDeque eventQueue;
/**
* Stopped the service
*/
private final AtomicBoolean stopped;
/**
* Current iteration number
*/
private volatile int currentIter;
/**
* Log file writter
*/
private volatile DistributeLog logWritter;
private volatile boolean needWriteName;
/**
* LOG format
*/
private static final DecimalFormat df = new DecimalFormat("#0.000000");
/**
* Construct the service.
*/
public MetricsService(AMContext context) {
super("algorithm-metrics-service");
this.context = context;
iterToMetricsMap = new ConcurrentHashMap<>();
metricsCache = new LinkedHashMap<>();
eventQueue = new LinkedBlockingDeque<>();
stopped = new AtomicBoolean(false);
currentIter = 0;
}
/**
* Get current iteration number
*
* @return int current iteration number
*/
public int getCurrentIter() {
return currentIter;
}
/**
* Get algorithm indexes
*
* @param itertionNum iteration number
* @return Map algorithm name to value map
*/
public Map getAlgoMetrics(int itertionNum) {
return iterToMetricsMap.get(itertionNum);
}
@Override protected void serviceInit(Configuration conf) throws Exception {
super.serviceInit(conf);
logWritter = new DistributeLog(conf);
needWriteName = true;
try {
logWritter.init();
} catch (Exception x) {
LOG.error("init log writter failed ", x);
logWritter = null;
}
}
@Override protected void serviceStart() throws Exception {
handler = new Thread() {
@SuppressWarnings("unchecked") @Override public void run() {
MetricsEvent event = null;
while (!stopped.get() && !Thread.currentThread().isInterrupted()) {
try {
event = eventQueue.take();
switch (event.getType()) {
case ALGORITHM_METRICS_UPDATE:
mergeAlgoMetrics(((MetricsUpdateEvent) event).getNameToMetrcMap());
break;
case TASK_ITERATION_UPDATE: {
int minIter = context.getWorkerManager().getMinIteration();
if (minIter > currentIter) {
calAlgoMetrics(minIter);
currentIter = minIter;
context.getModelSaver().epochUpdate(currentIter);
}
break;
}
default:
break;
}
} catch (InterruptedException e) {
if (!stopped.get()) {
LOG.error("algorithm log event handler is interrupted. " + e);
}
return;
} catch (Throwable e) {
LOG.error("algorithm log event handler failed.", e);
}
}
}
};
handler.setName("algo-log-event-handler");
handler.start();
}
@Override protected void serviceStop() throws Exception {
if (stopped.getAndSet(true)) {
return;
}
if (handler != null) {
handler.interrupt();
handler = null;
}
if (logWritter != null) {
try {
logWritter.close();
} catch (IOException e) {
}
logWritter = null;
}
super.serviceStop();
LOG.info("MasterService stopped");
}
private void mergeAlgoMetrics(Map nameToMetricMap) {
for (Map.Entry metricEntry : nameToMetricMap.entrySet()) {
Metric oldMetric = metricsCache.get(metricEntry.getKey());
if (oldMetric == null) {
metricsCache.put(metricEntry.getKey(), metricEntry.getValue());
} else {
oldMetric.merge(metricEntry.getValue());
}
}
}
private void calAlgoMetrics(int epoch) {
LinkedHashMap nameToMetricMap = new LinkedHashMap<>(metricsCache.size());
for (Map.Entry metricEntry : metricsCache.entrySet()) {
nameToMetricMap
.put(metricEntry.getKey(), df.format(Double.valueOf(metricEntry.getValue().toString())));
}
iterToMetricsMap.put(epoch, nameToMetricMap);
metricsCache.clear();
if (logWritter != null) {
try {
List names = new ArrayList<>(nameToMetricMap.size());
for (Map.Entry metricEntry : nameToMetricMap.entrySet()) {
names.add(metricEntry.getKey());
}
logWritter.setNames(names);
if (needWriteName) {
logWritter.writeNames();
needWriteName = false;
}
logWritter.writeLog(nameToMetricMap);
} catch (IOException e) {
LOG.error("write index values to file failed ", e);
}
}
try {
ObjectMapper mapper = new ObjectMapper();
String metrics = mapper.writeValueAsString(nameToMetricMap);
// LOG.info("Epoch=" + epoch + " Metrics=" + metrics);
} catch (Exception e) {
LOG.info("LOG metrics error " + e);
}
}
private String toString(Map metrics) {
StringBuilder sb = new StringBuilder();
for (Map.Entry entry : metrics.entrySet()) {
sb.append("\"").append(entry.getKey()).append("\":");
sb.append(entry.getValue()).append(",");
}
return sb.toString();
}
@Override public void handle(MetricsEvent event) {
if (eventQueue.size() > 10000) {
LOG.warn("There are over " + 10000 + " event in queue, refuse the new event");
return;
}
eventQueue.add(event);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy