Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.jobmanager
import java.io.IOException
import java.net.URI
import java.util
import akka.actor.ActorRef
import grizzled.slf4j.Logger
import org.apache.flink.api.common.JobID
import org.apache.flink.core.fs.{FileSystem, Path}
import org.apache.flink.runtime.jobgraph.JobStatus
import org.apache.flink.runtime.messages.accumulators._
import org.apache.flink.runtime.webmonitor.WebMonitorUtils
import org.apache.flink.runtime.{FlinkActor, LogMessages}
import org.apache.flink.runtime.messages.webmonitor._
import org.apache.flink.runtime.executiongraph.{ArchivedExecutionGraph, ExecutionGraph}
import org.apache.flink.runtime.history.FsJobArchivist
import org.apache.flink.runtime.messages.ArchiveMessages._
import org.apache.flink.runtime.messages.JobManagerMessages._
import org.apache.flink.runtime.messages.webmonitor.JobIdsWithStatusOverview.JobIdWithStatus
import scala.collection.mutable
import scala.concurrent.future
/**
* Actor which stores terminated Flink jobs. The number of stored Flink jobs is set by max_entries.
* If this number is exceeded, the oldest job will be discarded. One can interact with the actor by
* the following messages:
*
* - [[ArchiveExecutionGraph]] archives the attached [[ExecutionGraph]]
*
* - [[RequestArchivedJobs]] returns all currently stored [[ExecutionGraph]]s to the sender
* encapsulated in a [[ArchivedJobs]] message.
*
* - [[RequestJob]] returns the corresponding [[org.apache.flink.runtime.jobgraph.JobGraph]]
* encapsulated in [[JobFound]] if the job is stored by the MemoryArchivist. If not, then
* [[JobNotFound]] is returned.
*
* - [[RequestJobStatus]] returns the last state of the corresponding job. If the job can be found,
* then a [[CurrentJobStatus]] message with the last state is returned to the sender, otherwise
* a [[JobNotFound]] message is returned
*
* - [[RequestJobCounts]] returns the number of finished, canceled, and failed jobs as a Tuple3
*
* @param maxEntries Maximum number of stored Flink jobs
* @param archivePath Optional path of the job archive directory
*/
class MemoryArchivist(
private val maxEntries: Int,
private val archivePath: Option[Path])
extends FlinkActor
with LogMessages {
override val log = Logger(getClass)
/*
* Map of execution graphs belonging to recently started jobs with the time stamp of the last
* received job event. The insert order is preserved through a LinkedHashMap.
*/
protected val graphs = mutable.LinkedHashMap[JobID, ArchivedExecutionGraph]()
/* Counters for finished, canceled, and failed jobs */
private[this] var finishedCnt: Int = 0
private[this] var canceledCnt: Int = 0
private[this] var failedCnt: Int = 0
override def preStart(): Unit = {
log.info(s"Started memory archivist ${self.path}")
}
override def handleMessage: Receive = {
/* Receive Execution Graph to archive */
case ArchiveExecutionGraph(jobID, graph) =>
// Keep lru order in case we override a graph (from multiple job submission in one session).
// This deletes old ExecutionGraph with this JobID from the history but avoids to store
// redundant ExecutionGraphs.
// TODO Allow ExecutionGraphs with the same jobID to be stored and displayed in web interface
graphs.remove(jobID)
graphs.put(jobID, graph)
// update job counters
graph.getState match {
case JobStatus.FINISHED => finishedCnt += 1
case JobStatus.CANCELED => canceledCnt += 1
case JobStatus.FAILED => failedCnt += 1
// ignore transitional states, e.g. Cancelling, Running, Failing, etc.
case _ =>
}
archiveJsonFiles(graph)
trimHistory()
case msg : InfoMessage => handleWebServerInfoMessage(msg, sender())
case RequestArchivedJob(jobID: JobID) =>
val graph = graphs.get(jobID)
sender ! decorateMessage(ArchivedJob(graph))
case RequestArchivedJobs =>
sender ! decorateMessage(ArchivedJobs(graphs.values))
case RequestJob(jobID) =>
graphs.get(jobID) match {
case Some(graph) => sender ! decorateMessage(JobFound(jobID, graph))
case None => sender ! decorateMessage(JobNotFound(jobID))
}
case RequestJobStatus(jobID) =>
graphs.get(jobID) match {
case Some(graph) => sender ! decorateMessage(CurrentJobStatus(jobID, graph.getState))
case None => sender ! decorateMessage(JobNotFound(jobID))
}
case RequestJobCounts =>
sender ! decorateMessage((finishedCnt, canceledCnt, failedCnt))
case RequestAccumulatorResults(jobID) =>
try {
graphs.get(jobID) match {
case Some(graph) =>
val accumulatorValues = graph.getAccumulatorsSerialized()
sender() ! decorateMessage(AccumulatorResultsFound(jobID, accumulatorValues))
case None =>
sender() ! decorateMessage(AccumulatorResultsNotFound(jobID))
}
} catch {
case e: Exception =>
log.error("Cannot serialize accumulator result.", e)
sender() ! decorateMessage(AccumulatorResultsErroneous(jobID, e))
}
case RequestAccumulatorResultsStringified(jobID) =>
graphs.get(jobID) match {
case Some(graph) =>
val accumulatorValues = graph.getAccumulatorResultsStringified()
sender() ! decorateMessage(AccumulatorResultStringsFound(jobID, accumulatorValues))
case None =>
sender() ! decorateMessage(AccumulatorResultsNotFound(jobID))
}
}
/**
* Handle unmatched messages with an exception.
*/
override def unhandled(message: Any): Unit = {
// let the actor crash
throw new RuntimeException("Received unknown message " + message)
}
private def handleWebServerInfoMessage(message: InfoMessage, theSender: ActorRef): Unit = {
message match {
case _ : RequestJobsOverview =>
try {
sender ! decorateMessage(createJobsOverview())
}
catch {
case t: Throwable => log.error("Exception while creating the jobs overview", t)
}
case _ : RequestJobsWithIDsOverview =>
try {
sender ! decorateMessage(createJobsWithIDsOverview())
}
catch {
case t: Throwable => log.error("Exception while creating the jobs overview", t)
}
case _ : RequestJobDetails =>
val details = graphs.values.map {
v => WebMonitorUtils.createDetailsForJob(v)
}.toArray[JobDetails]
theSender ! decorateMessage(new MultipleJobsDetails(util.Arrays.asList(details: _*)))
}
}
private def archiveJsonFiles(graph: ArchivedExecutionGraph) {
// a suspended job is expected to continue on another job manager,
// so we aren't archiving it yet.
if (archivePath.isDefined && graph.getState.isGloballyTerminalState) {
try {
val p = validateAndNormalizeUri(archivePath.get.toUri)
future {
try {
FsJobArchivist.archiveJob(p, graph)
} catch {
case e: Exception =>
log.error("Failed to archive job.", e)
}
}(context.dispatcher)
} catch {
case e: Exception =>
log.warn(s"Failed to create Path for $archivePath. Job will not be archived.", e)
}
}
}
// --------------------------------------------------------------------------
// Request Responses
// --------------------------------------------------------------------------
private def createJobsOverview() : JobsOverview = {
new JobsOverview(0, finishedCnt, canceledCnt, failedCnt)
}
private def createJobsWithIDsOverview() : JobIdsWithStatusOverview = {
val jobIdsWithStatuses =
new java.util.ArrayList[JobIdWithStatus](graphs.size)
graphs.values.foreach { graph =>
jobIdsWithStatuses.add(
new JobIdWithStatus(graph.getJobID, graph.getState))
}
new JobIdsWithStatusOverview(jobIdsWithStatuses)
}
// --------------------------------------------------------------------------
// Utilities
// --------------------------------------------------------------------------
/**
* Remove old ExecutionGraphs belonging to a jobID
* * if more than max_entries are in the queue.
*/
private def trimHistory(): Unit = {
while (graphs.size > maxEntries) {
// get first graph inserted
val (jobID, value) = graphs.head
graphs.remove(jobID)
}
}
/**
* Checks and normalizes the archive path URI. This method first checks the validity of the
* URI (scheme, path, availability of a matching file system) and then normalizes the URL
* to a path.
*
* If the URI does not include an authority, but the file system configured for the URI has an
* authority, then the normalized path will include this authority.
*
* @param archivePathUri The URI to check and normalize.
* @return a normalized URI as a Path.
*
* @throws IllegalArgumentException Thrown, if the URI misses schema or path.
* @throws IOException Thrown, if no file system can be found for the URI's scheme.
*/
@throws[IOException]
private def validateAndNormalizeUri(archivePathUri: URI): Path = {
val scheme = archivePathUri.getScheme
val path = archivePathUri.getPath
// some validity checks
if (scheme == null) {
throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " +
"Please specify the file system scheme explicitly in the URI: " + archivePathUri)
}
if (path == null) {
throw new IllegalArgumentException("The path to store the job archives is null. " +
"Please specify a directory path for storing job archives. and the URI is: " +
archivePathUri)
}
if (path.length == 0 || path == "/") {
throw new IllegalArgumentException("Cannot use the root directory for storing job archives.")
}
try {
FileSystem.get(archivePathUri)
}
catch {
case e: Exception =>
throw new IllegalArgumentException(s"No file system found for URI '${archivePathUri}'.")
}
new Path(archivePathUri)
}
}