com.ebay.jetstream.event.processor.hdfs.stats.EventTsBasedSuccessChecker Maven / Gradle / Ivy
The newest version!
/*******************************************************************************
* Copyright © 2012-2015 eBay Software Foundation
* This program is dual licensed under the MIT and Apache 2.0 licenses.
* Please see LICENSE for more information.
*******************************************************************************/
package com.ebay.jetstream.event.processor.hdfs.stats;
import java.io.IOException;
import java.io.OutputStream;
import java.text.ParseException;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TimerTask;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.springframework.beans.factory.InitializingBean;
import com.ebay.jetstream.common.ShutDownable;
import com.ebay.jetstream.event.JetstreamEvent;
import com.ebay.jetstream.event.processor.hdfs.HdfsClient;
import com.ebay.jetstream.event.processor.hdfs.PartitionKey;
import com.ebay.jetstream.event.processor.hdfs.resolver.EventTimestampFolderResolver;
import com.ebay.jetstream.event.processor.hdfs.util.DateUtil;
import com.ebay.jetstream.event.processor.hdfs.util.JsonUtil;
import com.ebay.jetstream.event.processor.hdfs.util.MiscUtil;
import com.ebay.jetstream.event.processor.hdfs.util.ZkConnector;
import com.ebay.jetstream.messaging.MessageServiceTimer;
/**
* @author weifang
*
*/
public class EventTsBasedSuccessChecker extends EventTsBasedStatsRecorder
implements InitializingBean, ShutDownable {
public static final String PATH_ROOT = "/js_hdfs";
public static final String PATH_TIMESLOTS = "timeslots";
public static final String PATH_STATS = "stats";
public static final String TS_PATH_FORMAT = "yyyyMMdd_HHmmss";
public static final String KEY_WORKING_TIMESLOT = "workingTimeSlot";
public static final String KEY_HOST_NAME = "hostName";
public static final String STATS_SUFFIX = ".stats";
private static final Logger LOGGER = Logger
.getLogger(EventTsBasedSuccessChecker.class.getName());
// injected
private EventTsBasedSuccessCheckerConfig config;
private EventTimestampFolderResolver folderResolver;
private HdfsClient hdfs;
// internal
protected ZkConnector zkConnector;
protected SuccessTask successTask;
public void setConfig(EventTsBasedSuccessCheckerConfig config) {
this.config = config;
}
public void setFolderResolver(EventTimestampFolderResolver folderResolver) {
this.folderResolver = folderResolver;
}
public void setHdfs(HdfsClient hdfs) {
this.hdfs = hdfs;
}
@Override
public void afterPropertiesSet() throws Exception {
zkConnector = new ZkConnector(config.getZkHosts(), //
config.getZkConnectionTimeoutMs(), //
config.getZkSessionTimeoutMs(), //
config.getZkRetryTimes(), //
config.getZkSleepMsBetweenRetries());
successTask = new SuccessTask();
long interval = config.getSuccessCheckInterval();
MessageServiceTimer.sInstance().schedulePeriodicTask(successTask,
interval, interval);
}
@Override
public int getPendingEvents() {
return 0;
}
@Override
public void shutDown() {
if (successTask != null) {
successTask.cancel();
}
if (zkConnector != null) {
zkConnector.close();
}
}
@Override
protected Long getTimestamp(JetstreamEvent event) {
if (folderResolver.getTimestampKey() == null) {
return null;
} else {
return (Long) event.get(folderResolver.getTimestampKey());
}
}
@Override
protected boolean commitStats(PartitionKey key, long startOffset,
long endOffset, String folder, String destFileName, BaseStats stats) {
try {
long timeSlot = DateUtil.parseDate(folder,
folderResolver.getFolderPathFormat()).getTime();
String path = getStatsPath(timeSlot);
path += "/" + destFileName + STATS_SUFFIX;
zkConnector.writeJSON(
path,
genFileStats(key, startOffset, endOffset,
(EventTsBasedStats) stats));
return true;
} catch (ParseException e) {
LOGGER.log(Level.SEVERE, e.getMessage(), e);
return false;
}
}
@Override
public synchronized void onFilesCreated(PartitionKey key, long startOffset,
String folder, Collection eventTypes, String tmpFileName) {
super.onFilesCreated(key, startOffset, folder, eventTypes, tmpFileName);
try {
writeWorkingTimeSlot(
key,
DateUtil.parseDate(folder,
folderResolver.getFolderPathFormat()).getTime());
} catch (ParseException e) {
throw new RuntimeException(e);
}
}
protected List getSuccessCheckFolders() {
List ret = new LinkedList();
long minFolderTs = getMinWorking();
if (minFolderTs == 0) {
LOGGER.log(Level.INFO,
"No working log util now. Wait to check success in next round.");
return ret;
}
long folderTs = minFolderTs;
int checkRange = config.getSuccessCheckCount();
for (int i = 0; i < checkRange; i++) {
folderTs = folderTs - folderResolver.getFolderIntervalInMs();
ret.add(DateUtil.formatDate(folderTs,
folderResolver.getFolderPathFormat()));
}
return ret;
}
protected long getMinWorking() {
long minFolderTs = 0;
for (String topic : config.getTotalTopics()) {
String tpath = getTimeSlotPath(topic);
if (!zkConnector.exists(tpath))
continue;
for (String dc : config.getTotalDataCenters()) {
String dcPath = tpath + "/" + dc;
if (!zkConnector.exists(dcPath))
continue;
List pstrs = zkConnector.getChildren(dcPath);
if (pstrs == null)
continue;
for (String pstr : pstrs) {
int partition = Integer.parseInt(pstr);
try {
long workTs = readWorkingTimeSlot(topic, dc, partition);
if (workTs != 0) {
if (minFolderTs == 0)
minFolderTs = workTs;
else if (minFolderTs > workTs)
minFolderTs = workTs;
}
} catch (Exception e) {
continue;
}
}
}
}
return minFolderTs;
}
protected String getTimeSlotPath(String topic) {
StringBuffer sb = new StringBuffer(PATH_ROOT);
sb.append("/").append(PATH_TIMESLOTS).append("/")
.append(config.getIdentifier()).append("/").append(topic);
return sb.toString();
}
protected String getTimeSlotPath(String topic, String dataCenter,
int partition) {
StringBuffer sb = new StringBuffer();
sb.append(getTimeSlotPath(topic)).append("/").append(dataCenter)
.append("/").append(partition);
return sb.toString();
}
protected String getStatsPath(long timeSlot) {
String tsPart = DateUtil.formatDate(timeSlot, TS_PATH_FORMAT);
StringBuffer sb = new StringBuffer(PATH_ROOT);
sb.append("/").append(PATH_STATS).append("/")
.append(config.getIdentifier()).append("/").append(tsPart);
return sb.toString();
}
public long readWorkingTimeSlot(String topic, int partition) {
return readWorkingTimeSlot(topic, config.getDataCenter(), partition);
}
public long readWorkingTimeSlot(String topic, String dataCenter,
int partition) {
String path = getTimeSlotPath(topic, dataCenter, partition);
Map map = zkConnector.readJSON(path);
if (map == null || !map.containsKey(KEY_WORKING_TIMESLOT))
return 0;
String curTsStr = (String) map.get(KEY_WORKING_TIMESLOT);
if (curTsStr != null) {
return Long.valueOf(curTsStr);
}
return 0;
}
public void writeWorkingTimeSlot(PartitionKey key, long curTimeSlot) {
String path = getTimeSlotPath(key.getTopic(), config.getDataCenter(),
key.getPartition());
Map map = new HashMap();
map.put(KEY_HOST_NAME, MiscUtil.getLocalHostName());
map.put(KEY_WORKING_TIMESLOT, String.valueOf(curTimeSlot));
zkConnector.writeJSON(path, map);
}
public String getSuccessPath(String folder) {
return config.getOutputFolder() + "/" + folder + "/"
+ config.getSuccessFileName();
}
protected boolean isSuccess(String folder) {
try {
if (!hdfs.exist(config.getOutputFolder() + "/" + folder)) {
return false;
} else {
return true;
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
protected Map> getFileStats(String folder) {
try {
if (hdfs.exist(getSuccessPath(folder))) {
// already created success file, just return null to let the
// caller skip this folder
return null;
}
long ts = getTimeSlot(folder);
return readFileStats(ts);
} catch (ParseException e) {
LOGGER.log(Level.SEVERE, "Unrecognized folder " + folder, e);
return null;
} catch (IOException e) {
LOGGER.log(Level.SEVERE, e.toString(), e);
return null;
}
}
protected Map> readFileStats(long timeSlot) {
String tsPath = getStatsPath(timeSlot);
List children = zkConnector.getChildren(tsPath);
Map> ret = new LinkedHashMap>();
if (children != null) {
for (String child : children) {
String fullPath = tsPath + "/" + child;
Map chMap = zkConnector.readJSON(fullPath);
ret.put(child, chMap);
}
}
return ret;
}
protected Map genFileStats(PartitionKey partitionKey,
long startOffset, long endOffset, EventTsBasedStats stats) {
Map statsMap = new LinkedHashMap();
statsMap.put("hostName", MiscUtil.getLocalHostName());
statsMap.put("topic", partitionKey.getTopic());
statsMap.put("partition", partitionKey.getPartition());
statsMap.put("startOffset", startOffset);
statsMap.put("endOffset", endOffset);
if (stats.getLoadStartTime() != Long.MAX_VALUE)
statsMap.put("loadStartTime", stats.getLoadStartTime());
else
statsMap.put("loadStartTime", 0L);
statsMap.put("loadEndTime", stats.getLoadEndTime());
if (stats.getEventCounts().size() == 1) {
statsMap.put("eventCount", stats.getEventCounts().values()
.iterator().next());
} else {
statsMap.put("eventCount", stats.getEventCounts());
}
if (stats.getErrorCounts().size() == 1) {
statsMap.put("errorCount", stats.getErrorCounts().values()
.iterator().next());
} else {
statsMap.put("errorCount", stats.getErrorCounts());
}
if (stats.getMinTimestamps().size() == 1) {
long v = stats.getMinTimestamps().values().iterator().next();
statsMap.put("minTimestamp", MiscUtil.maxToZero(v));
} else {
Map map = new LinkedHashMap();
for (String key : stats.getMinTimestamps().keySet()) {
map.put(key,
MiscUtil.maxToZero(stats.getMinTimestamps().get(key)));
}
statsMap.put("minTimestamp", map);
}
if (stats.getMaxTimestamps().size() == 1) {
statsMap.put("maxTimestamp", stats.getMaxTimestamps().values()
.iterator().next());
} else {
statsMap.put("maxTimestamp", stats.getMaxTimestamps());
}
if (stats.getTotalLatencies().size() == 1) {
long l = stats.getTotalLatencies().values().iterator().next();
long c = stats.getEventCounts().values().iterator().next();
statsMap.put("avgLatencyInMs", (c == 0) ? 0 : l / c);
} else {
Map map = new LinkedHashMap();
for (String key : stats.getTotalLatencies().keySet()) {
long l = stats.getTotalLatencies().get(key);
long c = stats.getEventCounts().get(key);
map.put(key,(c == 0) ? 0 : l / c);
}
statsMap.put("avgLatencyInMs", map);
}
return statsMap;
}
protected void deleteFileStats(long timeSlot) {
String path = getStatsPath(timeSlot);
try {
List children = zkConnector.getChildren(path);
if (children != null) {
for (String child : children) {
zkConnector.delete(path + "/" + child);
}
}
zkConnector.delete(path);
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Fail to delete stats for " + path, e);
}
}
protected long getTimeSlot(String folder) throws ParseException {
long ts = DateUtil.parseDate(folder,
folderResolver.getFolderPathFormat()).getTime();
return ts;
}
protected void aggregateStats(Map> fileStats,
Map aggregatedStats) {
int fileCount = 0;
long firstLoadStartTime = Long.MAX_VALUE;
long lastLoadEndTime = 0;
Number n = null;
for (Entry> entry : fileStats.entrySet()) {
Map stats = entry.getValue();
fileCount++;
n = (Number) stats.get("loadStartTime");
if (n != null && firstLoadStartTime > n.longValue()) {
firstLoadStartTime = n.longValue();
}
n = (Number) stats.get("loadEndTime");
if (n != null && lastLoadEndTime < n.longValue()) {
lastLoadEndTime = n.longValue();
}
}
aggregatedStats.put("fileCount", fileCount);
aggregatedStats.put("firstLoadStartTime", firstLoadStartTime);
aggregatedStats.put("lastLoadEndTime", lastLoadEndTime);
aggregatedStats.put("files", fileStats);
}
public void markSuccess(String folder, Map aggStats) {
OutputStream os = null;
try {
long ts = getTimeSlot(folder);
os = hdfs.createFile(getSuccessPath(folder), true);
JsonUtil.mapToJsonStream(aggStats, os);
deleteFileStats(ts);
} catch (ParseException e) {
throw new RuntimeException("Unrecognized folder " + folder, e);
} catch (IOException e) {
throw new RuntimeException(
"Failed to output success file for folder " + folder, e);
} finally {
if (os != null) {
try {
os.close();
} catch (IOException e) {
LOGGER.log(Level.SEVERE, e.toString(), e);
}
}
}
}
class SuccessTask extends TimerTask {
@Override
public void run() {
try {
List folders = getSuccessCheckFolders();
for (String folder : folders) {
try {
if (isSuccess(folder)) {
Map> fileStats = getFileStats(folder);
if (fileStats != null) {
Map aggregated = new LinkedHashMap();
aggregateStats(fileStats, aggregated);
markSuccess(folder, aggregated);
}
}
} catch (Exception e) {
LOGGER.log(Level.SEVERE,
"Fail to check success for folder " + folder, e);
}
}
} catch (Throwable th) {
LOGGER.log(Level.SEVERE, th.toString(), th);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy