org.apache.spark.dataflint.jobgroup.JobGroupExtractor.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark_2.12 Show documentation
Show all versions of spark_2.12 Show documentation
Open Source Data-Application Performance Monitoring for Apache Spark
The newest version!
package org.apache.spark.dataflint.jobgroup
import org.apache.spark.SparkConf
import org.apache.spark.dataflint.listener.DatabricksAdditionalExecutionWrapper
import org.apache.spark.dataflint.saas.StageTaskSummary
import org.apache.spark.executor.ExecutorMetrics
import org.apache.spark.internal.config.Status.ASYNC_TRACKING_ENABLED
import org.apache.spark.metrics.ExecutorMetricType
import org.apache.spark.sql.execution.ui.{SQLAppStatusStore, SQLExecutionUIData, SparkPlanGraphWrapper}
import org.apache.spark.status.api.v1.{ExecutorStageSummary, ExecutorSummary}
import org.apache.spark.status.{ExecutorStageSummaryWrapper, _}
import org.apache.spark.util.kvstore.{InMemoryStore, KVStore}
import java.util.Date
import scala.collection.convert.ImplicitConversions.`iterator asScala`
import scala.reflect.{ClassTag, classTag}
class JobGroupExtractor(store: AppStatusStore, sqlStore: SQLAppStatusStore) {
private val kvStore = store.store.asInstanceOf[ElementTrackingStore]
def getGroupList(): Seq[String] = {
val allJobs = store.jobsList(null)
allJobs.flatMap(job => job.jobGroup).distinct
}
def extract(jobGroupId: String): (AppStatusStore, SQLAppStatusStore, Long) = {
val allJobs = store.jobsList(null)
val jobs = allJobs.filter(job => job.jobGroup.contains(jobGroupId))
val startTime = jobs.flatMap(_.submissionTime).min
val endTime = jobs.flatMap(_.completionTime).min
val jobsIds = jobs.map(_.jobId)
val stagesIds = jobs.flatMap(_.stageIds)
val stages = store.stageList(null).filter(stage => stagesIds.contains(stage.stageId))
val tasksIds = stages.flatMap(_.tasks.map(_.keys.toSeq))
val sqlsIds = sqlStore.executionsList().filter(_.jobs.keys.toSeq.exists(jobsIds.contains)).map(_.executionId)
val rddIds = stages.flatMap(_.rddIds).distinct
val stageAndAttemptIds = stages.map(stage => Array(stage.stageId, stage.attemptId))
val stageIds = stages.map(stage => stage.stageId).distinct
val newStore = new InMemoryStore()
val conf = new SparkConf().set(ASYNC_TRACKING_ENABLED, false)
val newTrackingStore = new ElementTrackingStore(newStore, conf)
// Filter the list of executors to include only those within the given start and end times.
val executorsInJob = readAll[ExecutorSummaryWrapper].filter { executor =>
// Get the executor's removeTime in milliseconds, or use Long.MaxValue if removeTime is None.
val executorRemoveTime = executor.info.removeTime.map(_.getTime).getOrElse(Long.MaxValue)
// Get the executor's addTime in milliseconds.
val executorAddTime = executor.info.addTime.getTime
// Include the executor if:
// 1. Its removeTime is greater than or equal to the start time, meaning it is active during the period.
// 2. Its addTime is less than or equal to the end time, meaning it was added before or at the end of the period.
executorRemoveTime >= startTime.getTime && executorAddTime <= endTime.getTime
}
val executorIds = executorsInJob.map(_.info.id)
val applicationInfo = readAll[ApplicationInfoWrapper].head
writeAll(newTrackingStore, readAll[ApplicationEnvironmentInfoWrapper])
writeAll(newTrackingStore, readAll[ResourceProfileWrapper])
writeAll(newTrackingStore, Seq(new AppSummary(jobsIds.length, stageAndAttemptIds.length)))
writeAll(newTrackingStore, Seq(new ApplicationInfoWrapper(
applicationInfo.info.copy(attempts =
applicationInfo.info.attempts.map(_.copy(
startTime = startTime,
endTime = endTime,
lastUpdated = endTime,
duration = endTime.getTime - startTime.getTime,
completed = true)))
)))
val executorStageSummaryWrapper = readAll[ExecutorStageSummaryWrapper]
.filter(executorStageSummaryWrapper => stageAndAttemptIds.exists(stageAndAttemptId =>
executorStageSummaryWrapper.stageId == stageAndAttemptId(0) &&
executorStageSummaryWrapper.stageAttemptId == stageAndAttemptId(1)
))
writeAll(newTrackingStore, executorStageSummaryWrapper)
writeAll(newTrackingStore, rebuildExecutorSummaries(executorsInJob, executorStageSummaryWrapper, startTime, endTime))
writeAll(newTrackingStore, readAll[StreamBlockData].filter(block => executorIds.contains(block.executorId)))
writeAll(newTrackingStore, jobsIds.map(jobId => kvStore.read(classOf[JobDataWrapper], jobId)))
writeAll(newTrackingStore, stageAndAttemptIds.map(stageAndAttemptId => kvStore.read(classOf[StageDataWrapper], stageAndAttemptId)))
writeAll(newTrackingStore, readAllIfExists[ExecutorStageSummaryWrapper](stageAndAttemptIds))
writeAll(newTrackingStore, readAllIfExists[StageTaskSummary](stageAndAttemptIds))
writeAll(newTrackingStore, readAllIfExists[RDDStorageInfoWrapper](rddIds.asInstanceOf[Seq[Object]]))
writeAll(newTrackingStore, tasksIds.map(taskId => kvStore.read(classOf[TaskDataWrapper], taskId)))
writeAll(newTrackingStore, stageIds.map(stageId => kvStore.read(classOf[RDDOperationGraphWrapper], stageId)))
writeAll(newTrackingStore, readAll[PoolData].filter(_.stageIds.exists(stageIds.contains)))
writeAll(newTrackingStore, sqlsIds.map(sqlId => kvStore.read(classOf[SQLExecutionUIData], sqlId)))
writeAll(newTrackingStore, sqlsIds.map(sqlId => kvStore.read(classOf[SparkPlanGraphWrapper], sqlId)))
writeAll(newTrackingStore, readAllIfExists[DatabricksAdditionalExecutionWrapper](sqlsIds.asInstanceOf[Seq[Object]]))
(new AppStatusStore(newTrackingStore), new SQLAppStatusStore(newTrackingStore), endTime.toInstant.toEpochMilli)
}
def rebuildExecutorSummaries(
executorsInJob: Seq[ExecutorSummaryWrapper],
executorStageSummaries: Seq[ExecutorStageSummaryWrapper],
startTime: Date,
endTime: Date
): Seq[ExecutorSummaryWrapper] = {
// Helper method to sum metrics for a given executor ID
def sumMetricsForExecutor(executorId: String): ExecutorStageSummary = {
// Filter stage summaries by executor ID
val filteredMetrics = executorStageSummaries.filter(_.executorId == executorId)
// Aggregate metrics for the executor
filteredMetrics.foldLeft(
new ExecutorStageSummary(
taskTime = 0L,
failedTasks = 0,
succeededTasks = 0,
killedTasks = 0,
inputBytes = 0L,
inputRecords = 0L,
outputBytes = 0L,
outputRecords = 0L,
shuffleRead = 0L,
shuffleReadRecords = 0L,
shuffleWrite = 0L,
shuffleWriteRecords = 0L,
memoryBytesSpilled = 0L,
diskBytesSpilled = 0L,
isBlacklistedForStage = false,
peakMemoryMetrics = None,
isExcludedForStage = false
)
) { (acc, wrapper) =>
val metrics = wrapper.info
// Combine peakMemoryMetrics by taking the max of each metric
val combinedPeakMemoryMetrics = (acc.peakMemoryMetrics, metrics.peakMemoryMetrics) match {
case (Some(accMetrics), Some(newMetrics)) =>
val combinedMetricsMap = ExecutorMetricType.metricToOffset.map { case (metric, _) =>
metric -> Math.max(
accMetrics.getMetricValue(metric),
newMetrics.getMetricValue(metric)
)
}
Some(new ExecutorMetrics(combinedMetricsMap.toMap))
case (None, newMetrics) => newMetrics
case (accMetrics, None) => accMetrics
}
new ExecutorStageSummary(
taskTime = acc.taskTime + metrics.taskTime,
failedTasks = acc.failedTasks + metrics.failedTasks,
succeededTasks = acc.succeededTasks + metrics.succeededTasks,
killedTasks = acc.killedTasks + metrics.killedTasks,
inputBytes = acc.inputBytes + metrics.inputBytes,
inputRecords = acc.inputRecords + metrics.inputRecords,
outputBytes = acc.outputBytes + metrics.outputBytes,
outputRecords = acc.outputRecords + metrics.outputRecords,
shuffleRead = acc.shuffleRead + metrics.shuffleRead,
shuffleReadRecords = acc.shuffleReadRecords + metrics.shuffleReadRecords,
shuffleWrite = acc.shuffleWrite + metrics.shuffleWrite,
shuffleWriteRecords = acc.shuffleWriteRecords + metrics.shuffleWriteRecords,
memoryBytesSpilled = acc.memoryBytesSpilled + metrics.memoryBytesSpilled,
diskBytesSpilled = acc.diskBytesSpilled + metrics.diskBytesSpilled,
isBlacklistedForStage = acc.isBlacklistedForStage || metrics.isBlacklistedForStage,
peakMemoryMetrics = combinedPeakMemoryMetrics,
isExcludedForStage = acc.isExcludedForStage || metrics.isExcludedForStage
)
}
}
// Rebuild executor summaries with aggregated metrics
executorsInJob.map { executorWrapper =>
val executorId = executorWrapper.info.id
val aggregatedMetrics = sumMetricsForExecutor(executorId)
// Adjust addTime and removeTime based on startTime and endTime
val adjustedAddTime = if (executorWrapper.info.addTime.before(startTime)) startTime else executorWrapper.info.addTime
val adjustedRemoveTime = executorWrapper.info.removeTime match {
case Some(removeTime) if removeTime.after(endTime) => Some(endTime)
case other => other
}
// Create a new ExecutorSummary with updated metrics and adjusted times
val updatedExecutorSummary = new ExecutorSummary(
id = executorWrapper.info.id,
hostPort = executorWrapper.info.hostPort,
isActive = executorWrapper.info.isActive,
rddBlocks = executorWrapper.info.rddBlocks,
memoryUsed = executorWrapper.info.memoryUsed,
diskUsed = executorWrapper.info.diskUsed,
totalCores = executorWrapper.info.totalCores,
maxTasks = executorWrapper.info.maxTasks,
activeTasks = executorWrapper.info.activeTasks,
failedTasks = aggregatedMetrics.failedTasks,
completedTasks = aggregatedMetrics.succeededTasks,
totalTasks = aggregatedMetrics.succeededTasks + aggregatedMetrics.failedTasks,
totalDuration = aggregatedMetrics.taskTime,
totalGCTime = executorWrapper.info.totalGCTime, // Keep existing
totalInputBytes = aggregatedMetrics.inputBytes,
totalShuffleRead = aggregatedMetrics.shuffleRead,
totalShuffleWrite = aggregatedMetrics.shuffleWrite,
isBlacklisted = executorWrapper.info.isBlacklisted,
maxMemory = executorWrapper.info.maxMemory,
addTime = adjustedAddTime,
removeTime = adjustedRemoveTime,
removeReason = executorWrapper.info.removeReason,
executorLogs = executorWrapper.info.executorLogs,
memoryMetrics = executorWrapper.info.memoryMetrics,
blacklistedInStages = executorWrapper.info.blacklistedInStages,
peakMemoryMetrics = aggregatedMetrics.peakMemoryMetrics,
attributes = executorWrapper.info.attributes,
resources = executorWrapper.info.resources,
resourceProfileId = executorWrapper.info.resourceProfileId,
isExcluded = executorWrapper.info.isExcluded,
excludedInStages = executorWrapper.info.excludedInStages
)
// Wrap the updated ExecutorSummary in ExecutorSummaryWrapper
new ExecutorSummaryWrapper(updatedExecutorSummary)
}
}
def readAllByKeys[T: ClassTag](keys: Seq[Object]): Seq[T] = {
keys.flatMap(key =>
Some(kvStore.read(classTag[T].runtimeClass, key))
).asInstanceOf[Seq[T]]
}
def readAllIfExists[T: ClassTag](keys: Seq[Object]): Seq[T] = {
keys.flatMap(key =>
try {
Some(kvStore.read(classTag[T].runtimeClass, key))
} catch {
case _: NoSuchElementException => None
}
).asInstanceOf[Seq[T]]
}
// Usage
def writeAll[T](kvstore: KVStore, data: Seq[T]): Unit = {
data.foreach(kvstore.write)
}
private def readAll[T: ClassTag]: Seq[T] = {
val view = kvStore.view(classTag[T].runtimeClass)
val it = view.closeableIterator()
try {
it.toSeq.asInstanceOf[Seq[T]]
} finally {
it.close()
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy