org.apache.sysml.runtime.instructions.spark.functions.SparkListener Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.sysml.runtime.instructions.spark.functions;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import org.apache.spark.SparkContext;
import org.apache.spark.scheduler.SparkListenerExecutorMetricsUpdate;
import org.apache.spark.scheduler.SparkListenerStageCompleted;
import org.apache.spark.scheduler.SparkListenerStageSubmitted;
import org.apache.spark.storage.RDDInfo;
import org.apache.spark.ui.jobs.StagesTab;
import org.apache.spark.ui.jobs.UIData.TaskUIData;
import org.apache.spark.ui.scope.RDDOperationGraphListener;
import scala.Option;
import scala.collection.Iterator;
import scala.collection.Seq;
import scala.xml.Node;
import org.apache.sysml.api.MLContext;
import org.apache.sysml.api.MLContextProxy;
import org.apache.sysml.runtime.instructions.spark.SPInstruction;
// Instead of extending org.apache.spark.JavaSparkListener
/**
* This class is only used by MLContext for now. It is used to provide UI data for Python notebook.
*
*/
public class SparkListener extends RDDOperationGraphListener {
public SparkListener(SparkContext sc) {
super(sc.conf());
this._sc = sc;
}
// protected SparkExecutionContext sec = null;
protected SparkContext _sc = null;
protected Set currentInstructions = new HashSet();
private HashMap> stageTaskMapping = new HashMap>();
public HashMap> stageDAGs = new HashMap>();
public HashMap> stageTimeline = new HashMap>();
public HashMap> jobDAGs = new HashMap>();
public HashMap stageExecutionTime = new HashMap();
public HashMap> stageRDDMapping = new HashMap>();
public void addCurrentInstruction(SPInstruction inst) {
synchronized(currentInstructions) {
currentInstructions.add(inst);
}
}
public void removeCurrentInstruction(SPInstruction inst) {
synchronized(currentInstructions) {
currentInstructions.remove(inst);
}
}
@Override
public void onExecutorMetricsUpdate(
SparkListenerExecutorMetricsUpdate executorMetricsUpdate) {
super.onExecutorMetricsUpdate(executorMetricsUpdate);
}
@Override
public void onJobEnd(org.apache.spark.scheduler.SparkListenerJobEnd jobEnd) {
super.onJobEnd(jobEnd);
int jobID = jobEnd.jobId();
Seq jobNodes = org.apache.spark.ui.UIUtils.showDagVizForJob(jobID, this.getOperationGraphForJob(jobID));
jobDAGs.put(jobID, jobNodes);
synchronized(currentInstructions) {
for(SPInstruction inst : currentInstructions) {
MLContext mlContext = MLContextProxy.getActiveMLContext();
if(mlContext != null && mlContext.getMonitoringUtil() != null) {
mlContext.getMonitoringUtil().setJobId(inst, jobID);
}
}
}
}
@Override
public void onStageSubmitted(SparkListenerStageSubmitted stageSubmitted) {
super.onStageSubmitted(stageSubmitted);
// stageSubmitted.stageInfo()
Integer stageID = stageSubmitted.stageInfo().stageId();
synchronized(currentInstructions) {
stageTaskMapping.put(stageID, new ArrayList());
}
Option rddOpGraph = Option.apply(org.apache.spark.ui.scope.RDDOperationGraph.makeOperationGraph(stageSubmitted.stageInfo()));
Iterator iter = stageSubmitted.stageInfo().rddInfos().toList().toIterator();
ArrayList rddIDs = new ArrayList();
while(iter.hasNext()) {
RDDInfo rddInfo = iter.next();
rddIDs.add(rddInfo.id());
}
stageRDDMapping.put(stageSubmitted.stageInfo().stageId(), rddIDs);
Seq stageDAG = org.apache.spark.ui.UIUtils.showDagVizForStage(stageID, rddOpGraph);
stageDAGs.put(stageID, stageDAG);
// Use org.apache.spark.ui.jobs.StagePage, org.apache.spark.ui.jobs.JobPage's makeTimeline method() to print timeline
// try {
ArrayList taskUIData = stageTaskMapping.get(stageID);
Seq currentStageTimeline = (new org.apache.spark.ui.jobs.StagePage(new StagesTab(_sc.ui().get())))
.makeTimeline(
scala.collection.JavaConversions.asScalaBuffer(taskUIData).toList(),
System.currentTimeMillis());
stageTimeline.put(stageID, currentStageTimeline);
// }
// catch(Exception e) {} // Ignore
// Seq rddsInvolved = stageSubmitted.stageInfo().rddInfos();
synchronized(currentInstructions) {
for(SPInstruction inst : currentInstructions) {
MLContext mlContext = MLContextProxy.getActiveMLContext();
if(mlContext != null && mlContext.getMonitoringUtil() != null) {
mlContext.getMonitoringUtil().setStageId(inst, stageSubmitted.stageInfo().stageId());
}
}
}
}
@Override
public void onTaskEnd(org.apache.spark.scheduler.SparkListenerTaskEnd taskEnd) {
Integer stageID = taskEnd.stageId();
synchronized(currentInstructions) {
if(stageTaskMapping.containsKey(stageID)) {
Option errorMessage = Option.apply(null); // TODO
TaskUIData taskData = new TaskUIData(taskEnd.taskInfo(), Option.apply(taskEnd.taskMetrics()), errorMessage);
stageTaskMapping.get(stageID).add(taskData);
}
else {
// TODO: throw exception
}
}
};
@Override
public void onStageCompleted(SparkListenerStageCompleted stageCompleted) {
super.onStageCompleted(stageCompleted);
try {
long completionTime = Long.parseLong(stageCompleted.stageInfo().completionTime().get().toString());
long submissionTime = Long.parseLong(stageCompleted.stageInfo().submissionTime().get().toString());
stageExecutionTime.put(stageCompleted.stageInfo().stageId(), completionTime-submissionTime);
}
catch(Exception e) {}
}
}