All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.sysml.api.monitoring.SparkMonitoringUtil Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.sysml.api.monitoring;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;

import scala.collection.Seq;
import scala.xml.Node;

import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import org.apache.sysml.lops.Lop;
import org.apache.sysml.runtime.DMLRuntimeException;
import org.apache.sysml.runtime.instructions.Instruction;
import org.apache.sysml.runtime.instructions.spark.SPInstruction;
import org.apache.sysml.runtime.instructions.spark.functions.SparkListener;

/**
 * Usage guide:
 * MLContext mlCtx = new MLContext(sc, true);
 * mlCtx.register...
 * mlCtx.execute(...)
 * mlCtx.getMonitoringUtil().getRuntimeInfoInHTML("runtime.html");
 */
public class SparkMonitoringUtil {
	// ----------------------------------------------------
	// For VLDB Demo:
	private Multimap instructions = TreeMultimap.create();
	private Multimap stageIDs = TreeMultimap.create();  // instruction -> stageIds
	private Multimap jobIDs = TreeMultimap.create();  // instruction -> jobIds
	private HashMap lineageInfo = new HashMap();	// instruction -> lineageInfo
	private HashMap instructionCreationTime = new HashMap();
	
	private Multimap rddInstructionMapping = TreeMultimap.create();
	
	private HashSet getRelatedInstructions(int stageID) {
		HashSet retVal = new HashSet();
		if(_sparkListener != null) {
			ArrayList rdds = _sparkListener.stageRDDMapping.get(stageID);
			for(Integer rddID : rdds) {
				retVal.addAll(rddInstructionMapping.get(rddID));
			}
		}
		return retVal;
	}
	
	private SparkListener _sparkListener = null;
	public SparkListener getSparkListener() {
		return _sparkListener;
	}
	
	private String explainOutput = "";
	
	public String getExplainOutput() {
		return explainOutput;
	}

	public void setExplainOutput(String explainOutput) {
		this.explainOutput = explainOutput;
	}

	public SparkMonitoringUtil(SparkListener sparkListener) {
		_sparkListener = sparkListener;
	}
	
	public void addCurrentInstruction(SPInstruction inst) {
		if(_sparkListener != null) {
			_sparkListener.addCurrentInstruction(inst);
		}
	}
	
	public void addRDDForInstruction(SPInstruction inst, Integer rddID) {
		this.rddInstructionMapping.put(rddID, getInstructionString(inst));
	}
	
	public void removeCurrentInstruction(SPInstruction inst) {
		if(_sparkListener != null) {
			_sparkListener.removeCurrentInstruction(inst);
		}
	}
	
	public void setDMLString(String dmlStr) {
		this.dmlStrForMonitoring = dmlStr;
	}
	
	public void resetMonitoringData() {
		if(_sparkListener != null && _sparkListener.stageDAGs != null)
			_sparkListener.stageDAGs.clear();
		if(_sparkListener != null && _sparkListener.stageTimeline != null)
			_sparkListener.stageTimeline.clear();
	}
	
	// public Multimap hops = ArrayListMultimap.create(); TODO:
	private String dmlStrForMonitoring = null;
	public void getRuntimeInfoInHTML(String htmlFilePath) throws DMLRuntimeException, IOException {
		String jsAndCSSFiles = ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ ""
				+ " ";
		BufferedWriter bw = new BufferedWriter(new FileWriter(htmlFilePath));
		bw.write("\n");
		bw.write(jsAndCSSFiles + "\n");
		bw.write("\n\n");
		
		bw.write("\n");
		bw.write("\n");
		bw.write("\n");
		bw.write("\n");
		bw.write("\n");
		bw.write("\n");
		bw.write("\n");
		
		for(Location loc : instructions.keySet()) {
			String dml = getExpression(loc);
			
			// Sort the instruction with time - so as to separate recompiled instructions
			List listInst = new ArrayList(instructions.get(loc));
			Collections.sort(listInst, new InstructionComparator(instructionCreationTime));
			
			if(dml != null && dml.trim().length() > 1) {
				bw.write("\n");
				int rowSpan = listInst.size();
				bw.write("\n");
				bw.write("\n");
				boolean firstTime = true;
				for(String inst : listInst) {
					if(!firstTime)
						bw.write("\n");
					
					if(inst.startsWith("SPARK"))
						bw.write("\n");
					else if(isInterestingCP(inst))
						bw.write("\n");
					else
						bw.write("\n");
					
					bw.write("\n");
					if(lineageInfo.containsKey(inst))
						bw.write("\n");
					else
						bw.write("\n");
					
					bw.write("\n");
					firstTime = false;
				}
				
			}
			
		}
		
		bw.write("
Position in scriptDMLInstructionStageIDsRDD Lineage
" + loc.toString() + "" + dml + "
" + inst + "" + inst + "" + inst + "" + getStageIDAsString(inst) + "" + lineageInfo.get(inst).replaceAll("\n", "
") + "
\n"); bw.close(); } private String getInQuotes(String str) { return "\"" + str + "\""; } private String getEscapedJSON(String json) { if(json == null) return ""; else { return json //.replaceAll("\\\\", "\\\\\\") .replaceAll("\\t", "\\\\t") .replaceAll("/", "\\\\/") .replaceAll("\"", "\\\\\"") .replaceAll("\\r?\\n", "\\\\n"); } } private long maxExpressionExecutionTime = 0; HashMap stageExecutionTimes = new HashMap(); HashMap expressionExecutionTimes = new HashMap(); HashMap instructionExecutionTimes = new HashMap(); HashMap> relatedInstructionsPerStage = new HashMap>(); private void fillExecutionTimes() { stageExecutionTimes.clear(); expressionExecutionTimes.clear(); for(Location loc : instructions.keySet()) { List listInst = new ArrayList(instructions.get(loc)); long expressionExecutionTime = 0; if(listInst != null && listInst.size() > 0) { for(String inst : listInst) { long instructionExecutionTime = 0; for(Integer stageId : stageIDs.get(inst)) { try { if(getStageExecutionTime(stageId) != null) { long stageExecTime = getStageExecutionTime(stageId); instructionExecutionTime += stageExecTime; expressionExecutionTime += stageExecTime; stageExecutionTimes.put(stageId, stageExecTime); } } catch(Exception e) {} relatedInstructionsPerStage.put(stageId, getRelatedInstructions(stageId)); } instructionExecutionTimes.put(inst, instructionExecutionTime); } expressionExecutionTime /= listInst.size(); // average } maxExpressionExecutionTime = Math.max(maxExpressionExecutionTime, expressionExecutionTime); expressionExecutionTimes.put(loc.toString(), expressionExecutionTime); } // Now fill empty instructions for(Entry kv : instructionExecutionTimes.entrySet()) { if(kv.getValue() == 0) { // Find all stages that contain this as related instruction long sumExecutionTime = 0; for(Entry> kv1 : relatedInstructionsPerStage.entrySet()) { if(kv1.getValue().contains(kv.getKey())) { sumExecutionTime += stageExecutionTimes.get(kv1.getKey()); } } kv.setValue(sumExecutionTime); } } for(Location loc : instructions.keySet()) { if(expressionExecutionTimes.get(loc.toString()) == 0) { List listInst = new ArrayList(instructions.get(loc)); long expressionExecutionTime = 0; if(listInst != null && listInst.size() > 0) { for(String inst : listInst) { expressionExecutionTime += instructionExecutionTimes.get(inst); } } expressionExecutionTime /= listInst.size(); // average maxExpressionExecutionTime = Math.max(maxExpressionExecutionTime, expressionExecutionTime); expressionExecutionTimes.put(loc.toString(), expressionExecutionTime); } } } /** * Useful to avoid passing large String through Py4J * @param fileName * @throws DMLRuntimeException * @throws IOException */ public void saveRuntimeInfoInJSONFormat(String fileName) throws DMLRuntimeException, IOException { String json = getRuntimeInfoInJSONFormat(); BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)); bw.write(json); bw.close(); } public String getRuntimeInfoInJSONFormat() throws DMLRuntimeException, IOException { StringBuilder retVal = new StringBuilder("{\n"); retVal.append(getInQuotes("dml") + ":" + getInQuotes(getEscapedJSON(dmlStrForMonitoring)) + ",\n"); retVal.append(getInQuotes("expressions") + ":" + "[\n"); boolean isFirstExpression = true; fillExecutionTimes(); for(Location loc : instructions.keySet()) { String dml = getEscapedJSON(getExpressionInJSON(loc)); if(dml != null) { // Sort the instruction with time - so as to separate recompiled instructions List listInst = new ArrayList(instructions.get(loc)); Collections.sort(listInst, new InstructionComparator(instructionCreationTime)); if(!isFirstExpression) { retVal.append(",\n"); } retVal.append("{\n"); isFirstExpression = false; retVal.append(getInQuotes("beginLine") + ":" + loc.beginLine + ",\n"); retVal.append(getInQuotes("beginCol") + ":" + loc.beginCol + ",\n"); retVal.append(getInQuotes("endLine") + ":" + loc.endLine + ",\n"); retVal.append(getInQuotes("endCol") + ":" + loc.endCol + ",\n"); long expressionExecutionTime = expressionExecutionTimes.get(loc.toString()); retVal.append(getInQuotes("expressionExecutionTime") + ":" + expressionExecutionTime + ",\n"); retVal.append(getInQuotes("expressionHeavyHitterFactor") + ":" + ((double)expressionExecutionTime / (double)maxExpressionExecutionTime) + ",\n"); retVal.append(getInQuotes("expression") + ":" + getInQuotes(dml) + ",\n"); retVal.append(getInQuotes("instructions") + ":" + "[\n"); boolean firstTime = true; for(String inst : listInst) { if(!firstTime) retVal.append(", {"); else retVal.append("{"); if(inst.startsWith("SPARK")) { retVal.append(getInQuotes("isSpark") + ":" + "true,\n"); } else if(isInterestingCP(inst)) { retVal.append(getInQuotes("isInteresting") + ":" + "true,\n"); } retVal.append(getStageIDAsJSONString(inst) + "\n"); if(lineageInfo.containsKey(inst)) { retVal.append(getInQuotes("lineageInfo") + ":" + getInQuotes(getEscapedJSON(lineageInfo.get(inst))) + ",\n"); } retVal.append(getInQuotes("instruction") + ":" + getInQuotes(getEscapedJSON(inst))); retVal.append("}"); firstTime = false; } retVal.append("]\n"); retVal.append("}\n"); } } return retVal.append("]\n}").toString(); } private boolean isInterestingCP(String inst) { if(inst.startsWith("CP rmvar") || inst.startsWith("CP cpvar") || inst.startsWith("CP mvvar")) return false; else if(inst.startsWith("CP")) return true; else return false; } private String getStageIDAsString(String instruction) { String retVal = ""; for(Integer stageId : stageIDs.get(instruction)) { String stageDAG = ""; String stageTimeLine = ""; if(getStageDAGs(stageId) != null) { stageDAG = getStageDAGs(stageId).toString(); } if(getStageTimeLine(stageId) != null) { stageTimeLine = getStageTimeLine(stageId).toString(); } retVal += "Stage:" + stageId + " (" + "
" + stageDAG.replaceAll("toggleDagViz\\(false\\)", "toggleDagViz(false, this)") + "
, " + "
" + stageTimeLine .replaceAll("drawTaskAssignmentTimeline\\(", "registerTimelineData(" + stageId + ", ") .replaceAll("class=\"expand-task-assignment-timeline\"", "class=\"expand-task-assignment-timeline\" onclick=\"toggleStageTimeline(this)\"") + "
" + ")"; } return retVal; } private String getStageIDAsJSONString(String instruction) { long instructionExecutionTime = instructionExecutionTimes.get(instruction); StringBuilder retVal = new StringBuilder(getInQuotes("instructionExecutionTime") + ":" + instructionExecutionTime + ",\n"); boolean isFirst = true; if(stageIDs.get(instruction).size() == 0) { // Find back references HashSet relatedStages = new HashSet(); for(Entry> kv : relatedInstructionsPerStage.entrySet()) { if(kv.getValue().contains(instruction)) { relatedStages.add(kv.getKey()); } } HashSet relatedInstructions = new HashSet(); for(Entry kv : stageIDs.entries()) { if(relatedStages.contains(kv.getValue())) { relatedInstructions.add(kv.getKey()); } } retVal.append(getInQuotes("backReferences") + ": [\n"); boolean isFirstRelInst = true; for(String relInst : relatedInstructions) { if(!isFirstRelInst) { retVal.append(",\n"); } retVal.append(getInQuotes(relInst)); isFirstRelInst = false; } retVal.append("], \n"); } else { retVal.append(getInQuotes("stages") + ": {"); for(Integer stageId : stageIDs.get(instruction)) { String stageDAG = ""; String stageTimeLine = ""; if(getStageDAGs(stageId) != null) { stageDAG = getStageDAGs(stageId).toString(); } if(getStageTimeLine(stageId) != null) { stageTimeLine = getStageTimeLine(stageId).toString(); } long stageExecutionTime = stageExecutionTimes.get(stageId); if(!isFirst) { retVal.append(",\n"); } retVal.append(getInQuotes("" + stageId) + ": {"); // Now add related instructions HashSet relatedInstructions = relatedInstructionsPerStage.get(stageId); retVal.append(getInQuotes("relatedInstructions") + ": [\n"); boolean isFirstRelInst = true; for(String relInst : relatedInstructions) { if(!isFirstRelInst) { retVal.append(",\n"); } retVal.append(getInQuotes(relInst)); isFirstRelInst = false; } retVal.append("],\n"); retVal.append(getInQuotes("DAG") + ":") .append( getInQuotes( getEscapedJSON(stageDAG.replaceAll("toggleDagViz\\(false\\)", "toggleDagViz(false, this)")) ) + ",\n" ) .append(getInQuotes("stageExecutionTime") + ":" + stageExecutionTime + ",\n") .append(getInQuotes("timeline") + ":") .append( getInQuotes( getEscapedJSON( stageTimeLine .replaceAll("drawTaskAssignmentTimeline\\(", "registerTimelineData(" + stageId + ", ") .replaceAll("class=\"expand-task-assignment-timeline\"", "class=\"expand-task-assignment-timeline\" onclick=\"toggleStageTimeline(this)\"")) ) ) .append("}"); isFirst = false; } retVal.append("}, "); } retVal.append(getInQuotes("jobs") + ": {"); isFirst = true; for(Integer jobId : jobIDs.get(instruction)) { String jobDAG = ""; if(getJobDAGs(jobId) != null) { jobDAG = getJobDAGs(jobId).toString(); } if(!isFirst) { retVal.append(",\n"); } retVal.append(getInQuotes("" + jobId) + ": {") .append(getInQuotes("DAG") + ":" ) .append(getInQuotes( getEscapedJSON(jobDAG.replaceAll("toggleDagViz\\(true\\)", "toggleDagViz(true, this)")) ) + "}\n"); isFirst = false; } retVal.append("}, "); return retVal.toString(); } String [] dmlLines = null; private String getExpression(Location loc) { try { if(dmlLines == null) { dmlLines = dmlStrForMonitoring.split("\\r?\\n"); } if(loc.beginLine == loc.endLine) { return dmlLines[loc.beginLine-1].substring(loc.beginCol-1, loc.endCol); } else { String retVal = dmlLines[loc.beginLine-1].substring(loc.beginCol-1); for(int i = loc.beginLine+1; i < loc.endLine; i++) { retVal += "
" + dmlLines[i-1]; } retVal += "
" + dmlLines[loc.endLine-1].substring(0, loc.endCol); return retVal; } } catch(Exception e) { return null; // "[[" + loc.beginLine + "," + loc.endLine + "," + loc.beginCol + "," + loc.endCol + "]]"; } } private String getExpressionInJSON(Location loc) { try { if(dmlLines == null) { dmlLines = dmlStrForMonitoring.split("\\r?\\n"); } if(loc.beginLine == loc.endLine) { return dmlLines[loc.beginLine-1].substring(loc.beginCol-1, loc.endCol); } else { String retVal = dmlLines[loc.beginLine-1].substring(loc.beginCol-1); for(int i = loc.beginLine+1; i < loc.endLine; i++) { retVal += "\\n" + dmlLines[i-1]; } retVal += "\\n" + dmlLines[loc.endLine-1].substring(0, loc.endCol); return retVal; } } catch(Exception e) { return null; // "[[" + loc.beginLine + "," + loc.endLine + "," + loc.beginCol + "," + loc.endCol + "]]"; } } public Seq getStageDAGs(int stageIDs) { if(_sparkListener == null || _sparkListener.stageDAGs == null) return null; else return _sparkListener.stageDAGs.get(stageIDs); } public Long getStageExecutionTime(int stageID) { if(_sparkListener == null || _sparkListener.stageDAGs == null) return null; else return _sparkListener.stageExecutionTime.get(stageID); } public Seq getJobDAGs(int jobID) { if(_sparkListener == null || _sparkListener.jobDAGs == null) return null; else return _sparkListener.jobDAGs.get(jobID); } public Seq getStageTimeLine(int stageIDs) { if(_sparkListener == null || _sparkListener.stageTimeline == null) return null; else return _sparkListener.stageTimeline.get(stageIDs); } public void setLineageInfo(Instruction inst, String plan) { lineageInfo.put(getInstructionString(inst), plan); } public void setStageId(Instruction inst, int stageId) { stageIDs.put(getInstructionString(inst), stageId); } public void setJobId(Instruction inst, int jobId) { jobIDs.put(getInstructionString(inst), jobId); } public void setInstructionLocation(Location loc, Instruction inst) { String instStr = getInstructionString(inst); instructions.put(loc, instStr); instructionCreationTime.put(instStr, System.currentTimeMillis()); } private String getInstructionString(Instruction inst) { String tmp = inst.toString(); tmp = tmp.replaceAll(Lop.OPERAND_DELIMITOR, " "); tmp = tmp.replaceAll(Lop.DATATYPE_PREFIX, "."); tmp = tmp.replaceAll(Lop.INSTRUCTION_DELIMITOR, ", "); return tmp; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy