All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.webmonitor.WebMonitorUtils Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.webmonitor;

import org.apache.flink.api.common.time.Time;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.WebOptions;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.concurrent.ScheduledExecutor;
import org.apache.flink.runtime.dispatcher.DispatcherGateway;
import org.apache.flink.runtime.execution.ExecutionState;
import org.apache.flink.runtime.executiongraph.AccessExecutionGraph;
import org.apache.flink.runtime.executiongraph.AccessExecutionJobVertex;
import org.apache.flink.runtime.executiongraph.AccessExecutionVertex;
import org.apache.flink.runtime.highavailability.HighAvailabilityServices;
import org.apache.flink.runtime.jobgraph.JobStatus;
import org.apache.flink.runtime.jobmaster.JobManagerGateway;
import org.apache.flink.runtime.leaderretrieval.LeaderRetrievalService;
import org.apache.flink.runtime.messages.webmonitor.JobDetails;
import org.apache.flink.runtime.rest.handler.legacy.files.StaticFileServerHandler;
import org.apache.flink.runtime.webmonitor.history.JsonArchivist;
import org.apache.flink.runtime.webmonitor.retriever.GatewayRetriever;
import org.apache.flink.runtime.webmonitor.retriever.LeaderGatewayRetriever;
import org.apache.flink.runtime.webmonitor.retriever.MetricQueryServiceRetriever;
import org.apache.flink.util.FlinkException;

import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.net.URI;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;

/**
 * Utilities for the web runtime monitor. This class contains for example methods to build
 * messages with aggregate information about the state of an execution graph, to be send
 * to the web server.
 */
public final class WebMonitorUtils {

	private static final String WEB_RUNTIME_MONITOR_CLASS_FQN = "org.apache.flink.runtime.webmonitor.WebRuntimeMonitor";

	private static final Logger LOG = LoggerFactory.getLogger(WebMonitorUtils.class);

	/**
	 * Singleton to hold the log and stdout file.
	 */
	public static class LogFileLocation {

		public final File logFile;
		public final File stdOutFile;

		private LogFileLocation(File logFile, File stdOutFile) {
			this.logFile = logFile;
			this.stdOutFile = stdOutFile;
		}

		/**
		 * Finds the Flink log directory using log.file Java property that is set during startup.
		 */
		public static LogFileLocation find(Configuration config) {
			final String logEnv = "log.file";
			String logFilePath = System.getProperty(logEnv);

			if (logFilePath == null) {
				LOG.warn("Log file environment variable '{}' is not set.", logEnv);
				logFilePath = config.getString(WebOptions.LOG_PATH);
			}

			// not configured, cannot serve log files
			if (logFilePath == null || logFilePath.length() < 4) {
				LOG.warn("JobManager log files are unavailable in the web dashboard. " +
					"Log file location not found in environment variable '{}' or configuration key '{}'.",
					logEnv, WebOptions.LOG_PATH);
				return new LogFileLocation(null, null);
			}

			String outFilePath = logFilePath.substring(0, logFilePath.length() - 3).concat("out");

			LOG.info("Determined location of main cluster component log file: {}", logFilePath);
			LOG.info("Determined location of main cluster component stdout file: {}", outFilePath);

			return new LogFileLocation(resolveFileLocation(logFilePath), resolveFileLocation(outFilePath));
		}

		/**
		 * Verify log file location.
		 *
		 * @param logFilePath Path to log file
		 * @return File or null if not a valid log file
		 */
		private static File resolveFileLocation(String logFilePath) {
			File logFile = new File(logFilePath);
			return (logFile.exists() && logFile.canRead()) ? logFile : null;
		}
	}

	/**
	 * Starts the web runtime monitor. Because the actual implementation of the runtime monitor is
	 * in another project, we load the runtime monitor dynamically.
	 *
	 * 

Because failure to start the web runtime monitor is not considered fatal, this method does * not throw any exceptions, but only logs them. * * @param config The configuration for the runtime monitor. * @param highAvailabilityServices HighAvailabilityServices used to start the WebRuntimeMonitor * @param jobManagerRetriever which retrieves the currently leading JobManager * @param queryServiceRetriever which retrieves the query service * @param timeout for asynchronous operations * @param scheduledExecutor to run asynchronous operations */ public static WebMonitor startWebRuntimeMonitor( Configuration config, HighAvailabilityServices highAvailabilityServices, LeaderGatewayRetriever jobManagerRetriever, MetricQueryServiceRetriever queryServiceRetriever, Time timeout, ScheduledExecutor scheduledExecutor) { // try to load and instantiate the class try { Class clazz = Class.forName(WEB_RUNTIME_MONITOR_CLASS_FQN).asSubclass(WebMonitor.class); Constructor constructor = clazz.getConstructor( Configuration.class, LeaderRetrievalService.class, LeaderGatewayRetriever.class, MetricQueryServiceRetriever.class, Time.class, ScheduledExecutor.class); return constructor.newInstance( config, highAvailabilityServices.getJobManagerLeaderRetriever(HighAvailabilityServices.DEFAULT_JOB_ID), jobManagerRetriever, queryServiceRetriever, timeout, scheduledExecutor); } catch (ClassNotFoundException e) { LOG.error("Could not load web runtime monitor. " + "Probably reason: flink-runtime-web is not in the classpath"); LOG.debug("Caught exception", e); return null; } catch (InvocationTargetException e) { LOG.error("WebServer could not be created", e.getTargetException()); return null; } catch (Throwable t) { LOG.error("Failed to instantiate web runtime monitor.", t); return null; } } /** * Checks whether the flink-runtime-web dependency is available and if so returns a * StaticFileServerHandler which can serve the static file contents. * * @param leaderRetriever to be used by the StaticFileServerHandler * @param restAddressFuture of the underlying REST server endpoint * @param timeout for lookup requests * @param tmpDir to be used by the StaticFileServerHandler to store temporary files * @param type of the gateway to retrieve * @return StaticFileServerHandler if flink-runtime-web is in the classpath; Otherwise Optional.empty * @throws IOException if we cannot create the StaticFileServerHandler */ public static Optional> tryLoadWebContent( GatewayRetriever leaderRetriever, CompletableFuture restAddressFuture, Time timeout, File tmpDir) throws IOException { if (isFlinkRuntimeWebInClassPath()) { return Optional.of(new StaticFileServerHandler<>( leaderRetriever, restAddressFuture, timeout, tmpDir)); } else { return Optional.empty(); } } /** * Loads the {@link WebMonitorExtension} which enables web submission. * * @param leaderRetriever to retrieve the leader * @param restAddressFuture of the underlying REST server endpoint * @param timeout for asynchronous requests * @param responseHeaders for the web submission handlers * @param uploadDir where the web submission handler store uploaded jars * @param executor to run asynchronous operations * @param configuration used to instantiate the web submission extension * @return Web submission extension * @throws FlinkException if the web submission extension could not be loaded */ public static WebMonitorExtension loadWebSubmissionExtension( GatewayRetriever leaderRetriever, CompletableFuture restAddressFuture, Time timeout, Map responseHeaders, java.nio.file.Path uploadDir, Executor executor, Configuration configuration) throws FlinkException { if (isFlinkRuntimeWebInClassPath()) { try { final Constructor webSubmissionExtensionConstructor = Class .forName("org.apache.flink.runtime.webmonitor.WebSubmissionExtension") .getConstructor( Configuration.class, CompletableFuture.class, GatewayRetriever.class, Map.class, java.nio.file.Path.class, Executor.class, Time.class); return (WebMonitorExtension) webSubmissionExtensionConstructor.newInstance( configuration, restAddressFuture, leaderRetriever, responseHeaders, uploadDir, executor, timeout); } catch (ClassNotFoundException | NoSuchMethodException | InstantiationException | InvocationTargetException | IllegalAccessException e) { throw new FlinkException("Could not load web submission extension.", e); } } else { throw new FlinkException("The module flink-runtime-web could not be found in the class path. Please add " + "this jar in order to enable web based job submission."); } } public static JsonArchivist[] getJsonArchivists() { try { Class clazz = Class.forName(WEB_RUNTIME_MONITOR_CLASS_FQN).asSubclass(WebMonitor.class); Method method = clazz.getMethod("getJsonArchivists"); return (JsonArchivist[]) method.invoke(null); } catch (ClassNotFoundException e) { LOG.error("Could not load web runtime monitor. " + "Probably reason: flink-runtime-web is not in the classpath"); LOG.debug("Caught exception", e); return new JsonArchivist[0]; } catch (Throwable t) { LOG.error("Failed to retrieve archivers from web runtime monitor.", t); return new JsonArchivist[0]; } } public static Map fromKeyValueJsonArray(String jsonString) { try { Map map = new HashMap<>(); ObjectMapper m = new ObjectMapper(); ArrayNode array = (ArrayNode) m.readTree(jsonString); Iterator elements = array.elements(); while (elements.hasNext()) { JsonNode node = elements.next(); String key = node.get("key").asText(); String value = node.get("value").asText(); map.put(key, value); } return map; } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } public static JobDetails createDetailsForJob(AccessExecutionGraph job) { JobStatus status = job.getState(); long started = job.getStatusTimestamp(JobStatus.CREATED); long finished = status.isGloballyTerminalState() ? job.getStatusTimestamp(status) : -1L; long duration = (finished >= 0L ? finished : System.currentTimeMillis()) - started; int[] countsPerStatus = new int[ExecutionState.values().length]; long lastChanged = 0; int numTotalTasks = 0; for (AccessExecutionJobVertex ejv : job.getVerticesTopologically()) { AccessExecutionVertex[] vertices = ejv.getTaskVertices(); numTotalTasks += vertices.length; for (AccessExecutionVertex vertex : vertices) { ExecutionState state = vertex.getExecutionState(); countsPerStatus[state.ordinal()]++; lastChanged = Math.max(lastChanged, vertex.getStateTimestamp(state)); } } lastChanged = Math.max(lastChanged, finished); return new JobDetails( job.getJobID(), job.getJobName(), started, finished, duration, status, lastChanged, countsPerStatus, numTotalTasks); } /** * Checks and normalizes the given URI. This method first checks the validity of the * URI (scheme and path are not null) and then normalizes the URI to a path. * * @param archiveDirUri The URI to check and normalize. * @return A normalized URI as a Path. * * @throws IllegalArgumentException Thrown, if the URI misses scheme or path. */ public static Path validateAndNormalizeUri(URI archiveDirUri) { final String scheme = archiveDirUri.getScheme(); final String path = archiveDirUri.getPath(); // some validity checks if (scheme == null) { throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI."); } if (path == null) { throw new IllegalArgumentException("The path to store the job archive data in is null. " + "Please specify a directory path for the archiving the job data."); } return new Path(archiveDirUri); } /** * Private constructor to prevent instantiation. */ private WebMonitorUtils() { throw new RuntimeException(); } /** * Returns {@code true} if the optional dependency {@code flink-runtime-web} is in the * classpath. */ private static boolean isFlinkRuntimeWebInClassPath() { try { Class.forName(WEB_RUNTIME_MONITOR_CLASS_FQN).asSubclass(WebMonitor.class); return true; } catch (ClassNotFoundException e) { // class not found means that there is no flink-runtime-web in the classpath return false; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy