All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.jobgraph.JobVertex Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.jobgraph;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.operators.ResourceSpec;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.io.InputSplitSource;
import org.apache.flink.runtime.io.network.partition.ResultPartitionType;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.jobgraph.tasks.StoppableTask;
import org.apache.flink.runtime.jobmanager.scheduler.CoLocationGroup;
import org.apache.flink.runtime.jobmanager.scheduler.SlotSharingGroup;
import org.apache.flink.util.Preconditions;

import java.util.ArrayList;
import java.util.List;

import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * The base class for job vertexes.
 */
public class JobVertex implements java.io.Serializable {

	private static final long serialVersionUID = 1L;

	private static final String DEFAULT_NAME = "(unnamed vertex)";

	// --------------------------------------------------------------------------------------------
	// Members that define the structure / topology of the graph
	// --------------------------------------------------------------------------------------------

	/** The ID of the vertex. */
	private final JobVertexID id;

	/** The alternative IDs of the vertex. */
	private final ArrayList idAlternatives = new ArrayList<>();

	/** The IDs of all operators contained in this vertex. */
	private final ArrayList operatorIDs = new ArrayList<>();

	/** The alternative IDs of all operators contained in this vertex. */
	private final ArrayList operatorIdsAlternatives = new ArrayList<>();

	/** List of produced data sets, one per writer */
	private final ArrayList results = new ArrayList();

	/** List of edges with incoming data. One per Reader. */
	private final ArrayList inputs = new ArrayList();

	/** Number of subtasks to split this task into at runtime.*/
	private int parallelism = ExecutionConfig.PARALLELISM_DEFAULT;

	/** Maximum number of subtasks to split this taks into a runtime. */
	private int maxParallelism = -1;

	/** The minimum resource of the vertex */
	private ResourceSpec minResources = ResourceSpec.DEFAULT;

	/** The preferred resource of the vertex */
	private ResourceSpec preferredResources = ResourceSpec.DEFAULT;

	/** Custom configuration passed to the assigned task at runtime. */
	private Configuration configuration;

	/** The class of the invokable. */
	private String invokableClassName;

	/** Indicates of this job vertex is stoppable or not. */
	private boolean isStoppable = false;

	/** Optionally, a source of input splits */
	private InputSplitSource inputSplitSource;

	/** The name of the vertex. This will be shown in runtime logs and will be in the runtime environment */
	private String name;

	/** Optionally, a sharing group that allows subtasks from different job vertices to run concurrently in one slot */
	private SlotSharingGroup slotSharingGroup;

	/** The group inside which the vertex subtasks share slots */
	private CoLocationGroup coLocationGroup;

	/** Optional, the name of the operator, such as 'Flat Map' or 'Join', to be included in the JSON plan */
	private String operatorName;

	/** Optional, the description of the operator, like 'Hash Join', or 'Sorted Group Reduce',
	 * to be included in the JSON plan */
	private String operatorDescription;

	/** Optional, pretty name of the operator, to be displayed in the JSON plan */
	private String operatorPrettyName;

	/** Optional, the JSON for the optimizer properties of the operator result,
	 * to be included in the JSON plan */
	private String resultOptimizerProperties;

	// --------------------------------------------------------------------------------------------

	/**
	 * Constructs a new job vertex and assigns it with the given name.
	 * 
	 * @param name The name of the new job vertex.
	 */
	public JobVertex(String name) {
		this(name, null);
	}

	/**
	 * Constructs a new job vertex and assigns it with the given name.
	 * 
	 * @param name The name of the new job vertex.
	 * @param id The id of the job vertex.
	 */
	public JobVertex(String name, JobVertexID id) {
		this.name = name == null ? DEFAULT_NAME : name;
		this.id = id == null ? new JobVertexID() : id;
		// the id lists must have the same size
		this.operatorIDs.add(OperatorID.fromJobVertexID(this.id));
		this.operatorIdsAlternatives.add(null);
	}

	/**
	 * Constructs a new job vertex and assigns it with the given name.
	 *
	 * @param name The name of the new job vertex.
	 * @param primaryId The id of the job vertex.
	 * @param alternativeIds The alternative ids of the job vertex.
	 * @param operatorIds The ids of all operators contained in this job vertex.
	 * @param alternativeOperatorIds The alternative ids of all operators contained in this job vertex-
	 */
	public JobVertex(String name, JobVertexID primaryId, List alternativeIds, List operatorIds, List alternativeOperatorIds) {
		Preconditions.checkArgument(operatorIds.size() == alternativeOperatorIds.size());
		this.name = name == null ? DEFAULT_NAME : name;
		this.id = primaryId == null ? new JobVertexID() : primaryId;
		this.idAlternatives.addAll(alternativeIds);
		this.operatorIDs.addAll(operatorIds);
		this.operatorIdsAlternatives.addAll(alternativeOperatorIds);
	}

	// --------------------------------------------------------------------------------------------

	/**
	 * Returns the ID of this job vertex.
	 * 
	 * @return The ID of this job vertex
	 */
	public JobVertexID getID() {
		return this.id;
	}

	/**
	 * Returns a list of all alternative IDs of this job vertex.
	 *
	 * @return List of all alternative IDs for this job vertex
	 */
	public List getIdAlternatives() {
		return idAlternatives;
	}

	/**
	 * Returns the name of the vertex.
	 * 
	 * @return The name of the vertex.
	 */
	public String getName() {
		return this.name;
	}

	/**
	 * Sets the name of the vertex
	 * 
	 * @param name The new name.
	 */
	public void setName(String name) {
		this.name = name == null ? DEFAULT_NAME : name;
	}

	/**
	 * Returns the number of produced intermediate data sets.
	 * 
	 * @return The number of produced intermediate data sets.
	 */
	public int getNumberOfProducedIntermediateDataSets() {
		return this.results.size();
	}

	/**
	 * Returns the number of inputs.
	 * 
	 * @return The number of inputs.
	 */
	public int getNumberOfInputs() {
		return this.inputs.size();
	}

	public List getOperatorIDs() {
		return operatorIDs;
	}

	public List getUserDefinedOperatorIDs() {
		return operatorIdsAlternatives;
	}

	/**
	 * Returns the vertex's configuration object which can be used to pass custom settings to the task at runtime.
	 * 
	 * @return the vertex's configuration object
	 */
	public Configuration getConfiguration() {
		if (this.configuration == null) {
			this.configuration = new Configuration();
		}
		return this.configuration;
	}

	public void setInvokableClass(Class invokable) {
		Preconditions.checkNotNull(invokable);
		this.invokableClassName = invokable.getName();
		this.isStoppable = StoppableTask.class.isAssignableFrom(invokable);
	}

	/**
	 * Returns the name of the invokable class which represents the task of this vertex.
	 * 
	 * @return The name of the invokable class, null if not set.
	 */
	public String getInvokableClassName() {
		return this.invokableClassName;
	}

	/**
	 * Returns the invokable class which represents the task of this vertex
	 * 
	 * @param cl The classloader used to resolve user-defined classes
	 * @return The invokable class, null if it is not set
	 */
	public Class getInvokableClass(ClassLoader cl) {
		if (cl == null) {
			throw new NullPointerException("The classloader must not be null.");
		}
		if (invokableClassName == null) {
			return null;
		}

		try {
			return Class.forName(invokableClassName, true, cl).asSubclass(AbstractInvokable.class);
		}
		catch (ClassNotFoundException e) {
			throw new RuntimeException("The user-code class could not be resolved.", e);
		}
		catch (ClassCastException e) {
			throw new RuntimeException("The user-code class is no subclass of " + AbstractInvokable.class.getName(), e);
		}
	}

	/**
	 * Gets the parallelism of the task.
	 * 
	 * @return The parallelism of the task.
	 */
	public int getParallelism() {
		return parallelism;
	}

	/**
	 * Sets the parallelism for the task.
	 * 
	 * @param parallelism The parallelism for the task.
	 */
	public void setParallelism(int parallelism) {
		if (parallelism < 1) {
			throw new IllegalArgumentException("The parallelism must be at least one.");
		}
		this.parallelism = parallelism;
	}

	/**
	 * Gets the maximum parallelism for the task.
	 *
	 * @return The maximum parallelism for the task.
	 */
	public int getMaxParallelism() {
		return maxParallelism;
	}

	/**
	 * Sets the maximum parallelism for the task.
	 *
	 * @param maxParallelism The maximum parallelism to be set. must be between 1 and Short.MAX_VALUE.
	 */
	public void setMaxParallelism(int maxParallelism) {
		this.maxParallelism = maxParallelism;
	}

	/**
	 * Gets the minimum resource for the task.
	 *
	 * @return The minimum resource for the task.
	 */
	public ResourceSpec getMinResources() {
		return minResources;
	}

	/**
	 * Gets the preferred resource for the task.
	 *
	 * @return The preferred resource for the task.
	 */
	public ResourceSpec getPreferredResources() {
		return preferredResources;
	}

	/**
	 * Sets the minimum and preferred resources for the task.
	 *
	 * @param minResources The minimum resource for the task.
	 * @param preferredResources The preferred resource for the task.
	 */
	public void setResources(ResourceSpec minResources, ResourceSpec preferredResources) {
		this.minResources = checkNotNull(minResources);
		this.preferredResources = checkNotNull(preferredResources);
	}

	public InputSplitSource getInputSplitSource() {
		return inputSplitSource;
	}

	public void setInputSplitSource(InputSplitSource inputSplitSource) {
		this.inputSplitSource = inputSplitSource;
	}

	public List getProducedDataSets() {
		return this.results;
	}

	public List getInputs() {
		return this.inputs;
	}

	/**
	 * Associates this vertex with a slot sharing group for scheduling. Different vertices in the same
	 * slot sharing group can run one subtask each in the same slot.
	 * 
	 * @param grp The slot sharing group to associate the vertex with.
	 */
	public void setSlotSharingGroup(SlotSharingGroup grp) {
		if (this.slotSharingGroup != null) {
			this.slotSharingGroup.removeVertexFromGroup(id);
		}

		this.slotSharingGroup = grp;
		if (grp != null) {
			grp.addVertexToGroup(id);
		}
	}

	/**
	 * Gets the slot sharing group that this vertex is associated with. Different vertices in the same
	 * slot sharing group can run one subtask each in the same slot. If the vertex is not associated with
	 * a slot sharing group, this method returns {@code null}.
	 * 
	 * @return The slot sharing group to associate the vertex with, or {@code null}, if not associated with one.
	 */
	public SlotSharingGroup getSlotSharingGroup() {
		return slotSharingGroup;
	}

	/**
	 * Tells this vertex to strictly co locate its subtasks with the subtasks of the given vertex.
	 * Strict co-location implies that the n'th subtask of this vertex will run on the same parallel computing
	 * instance (TaskManager) as the n'th subtask of the given vertex.
	 * 
	 * NOTE: Co-location is only possible between vertices in a slot sharing group.
	 * 
	 * NOTE: This vertex must (transitively) depend on the vertex to be co-located with. That means that the
	 * respective vertex must be a (transitive) input of this vertex.
	 * 
	 * @param strictlyCoLocatedWith The vertex whose subtasks to co-locate this vertex's subtasks with.
	 * 
	 * @throws IllegalArgumentException Thrown, if this vertex and the vertex to co-locate with are not in a common
	 *                                  slot sharing group.
	 * 
	 * @see #setSlotSharingGroup(SlotSharingGroup)
	 */
	public void setStrictlyCoLocatedWith(JobVertex strictlyCoLocatedWith) {
		if (this.slotSharingGroup == null || this.slotSharingGroup != strictlyCoLocatedWith.slotSharingGroup) {
			throw new IllegalArgumentException("Strict co-location requires that both vertices are in the same slot sharing group.");
		}

		CoLocationGroup thisGroup = this.coLocationGroup;
		CoLocationGroup otherGroup = strictlyCoLocatedWith.coLocationGroup;

		if (otherGroup == null) {
			if (thisGroup == null) {
				CoLocationGroup group = new CoLocationGroup(this, strictlyCoLocatedWith);
				this.coLocationGroup = group;
				strictlyCoLocatedWith.coLocationGroup = group;
			}
			else {
				thisGroup.addVertex(strictlyCoLocatedWith);
				strictlyCoLocatedWith.coLocationGroup = thisGroup;
			}
		}
		else {
			if (thisGroup == null) {
				otherGroup.addVertex(this);
				this.coLocationGroup = otherGroup;
			}
			else {
				// both had yet distinct groups, we need to merge them
				thisGroup.mergeInto(otherGroup);
			}
		}
	}

	public CoLocationGroup getCoLocationGroup() {
		return coLocationGroup;
	}

	public void updateCoLocationGroup(CoLocationGroup group) {
		this.coLocationGroup = group;
	}

	// --------------------------------------------------------------------------------------------

	public IntermediateDataSet createAndAddResultDataSet(ResultPartitionType partitionType) {
		return createAndAddResultDataSet(new IntermediateDataSetID(), partitionType);
	}

	public IntermediateDataSet createAndAddResultDataSet(
			IntermediateDataSetID id,
			ResultPartitionType partitionType) {

		IntermediateDataSet result = new IntermediateDataSet(id, partitionType, this);
		this.results.add(result);
		return result;
	}

	public JobEdge connectDataSetAsInput(IntermediateDataSet dataSet, DistributionPattern distPattern) {
		JobEdge edge = new JobEdge(dataSet, this, distPattern);
		this.inputs.add(edge);
		dataSet.addConsumer(edge);
		return edge;
	}

	public JobEdge connectNewDataSetAsInput(
			JobVertex input,
			DistributionPattern distPattern,
			ResultPartitionType partitionType) {

		IntermediateDataSet dataSet = input.createAndAddResultDataSet(partitionType);

		JobEdge edge = new JobEdge(dataSet, this, distPattern);
		this.inputs.add(edge);
		dataSet.addConsumer(edge);
		return edge;
	}

	public void connectIdInput(IntermediateDataSetID dataSetId, DistributionPattern distPattern) {
		JobEdge edge = new JobEdge(dataSetId, this, distPattern);
		this.inputs.add(edge);
	}

	// --------------------------------------------------------------------------------------------

	public boolean isInputVertex() {
		return this.inputs.isEmpty();
	}

	public boolean isStoppable() {
		return this.isStoppable;
	}

	public boolean isOutputVertex() {
		return this.results.isEmpty();
	}

	public boolean hasNoConnectedInputs() {
		for (JobEdge edge : inputs) {
			if (!edge.isIdReference()) {
				return false;
			}
		}

		return true;
	}

	// --------------------------------------------------------------------------------------------

	/**
	 * A hook that can be overwritten by sub classes to implement logic that is called by the
	 * master when the job starts.
	 * 
	 * @param loader The class loader for user defined code.
	 * @throws Exception The method may throw exceptions which cause the job to fail immediately.
	 */
	public void initializeOnMaster(ClassLoader loader) throws Exception {}

	/**
	 * A hook that can be overwritten by sub classes to implement logic that is called by the
	 * master after the job completed.
	 * 
	 * @param loader The class loader for user defined code.
	 * @throws Exception The method may throw exceptions which cause the job to fail immediately.
	 */
	public void finalizeOnMaster(ClassLoader loader) throws Exception {}

	// --------------------------------------------------------------------------------------------

	public String getOperatorName() {
		return operatorName;
	}

	public void setOperatorName(String operatorName) {
		this.operatorName = operatorName;
	}

	public String getOperatorDescription() {
		return operatorDescription;
	}

	public void setOperatorDescription(String operatorDescription) {
		this.operatorDescription = operatorDescription;
	}

	public void setOperatorPrettyName(String operatorPrettyName) {
		this.operatorPrettyName = operatorPrettyName;
	}

	public String getOperatorPrettyName() {
		return operatorPrettyName;
	}

	public String getResultOptimizerProperties() {
		return resultOptimizerProperties;
	}

	public void setResultOptimizerProperties(String resultOptimizerProperties) {
		this.resultOptimizerProperties = resultOptimizerProperties;
	}

	// --------------------------------------------------------------------------------------------

	@Override
	public String toString() {
		return this.name + " (" + this.invokableClassName + ')';
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy