All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tez.dag.api.VertexManagerPluginContext Maven / Gradle / Ivy

There is a newer version: 0.10.4
Show newest version
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.tez.dag.api;

import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;

import javax.annotation.Nullable;

import org.apache.hadoop.classification.InterfaceAudience.Public;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
import org.apache.hadoop.yarn.api.records.Resource;
import org.apache.tez.dag.api.event.VertexState;
import org.apache.tez.runtime.api.InputSpecUpdate;
import org.apache.tez.runtime.api.VertexStatistics;
import org.apache.tez.runtime.api.events.CustomProcessorEvent;
import org.apache.tez.runtime.api.events.InputDataInformationEvent;

import org.apache.tez.common.Preconditions;

/**
 * Object with API's to interact with the Tez execution engine
 */
@Unstable
@Public
public interface VertexManagerPluginContext {
  
  public class ScheduleTaskRequest {
    int taskIndex;
    TaskLocationHint locationHint;

    public static ScheduleTaskRequest create(int taskIndex, @Nullable TaskLocationHint locationHint) {
      return new ScheduleTaskRequest(taskIndex, locationHint); 
    }
    
    private ScheduleTaskRequest(int taskIndex, @Nullable TaskLocationHint locationHint) {
      Preconditions.checkState(taskIndex >= 0);
      this.taskIndex = taskIndex;
      this.locationHint = locationHint;
    }
    
    public int getTaskIndex() {
      return taskIndex;
    }
    
    public TaskLocationHint getTaskLocationHint() {
      return locationHint;
    }    
  }
  
  @Deprecated
  public class TaskWithLocationHint {
    Integer taskIndex;
    TaskLocationHint locationHint;
    public TaskWithLocationHint(Integer taskIndex, @Nullable TaskLocationHint locationHint) {
      this.taskIndex = Objects.requireNonNull(taskIndex);
      this.locationHint = locationHint;
    }
    
    public Integer getTaskIndex() {
      return taskIndex;
    }
    
    public TaskLocationHint getTaskLocationHint() {
      return locationHint;
    }
  }

  /**
   * Get the edge properties on the input edges of this vertex. The input edge 
   * is represented by the source vertex name
   * @return Map of source vertex name and edge property
   */
  public Map getInputVertexEdgeProperties();
  
  /**
   * Get the edge properties on the output edges of this vertex. The output edge 
   * is represented by the destination vertex name
   * @return Map of destination vertex name and edge property
   */
  public Map getOutputVertexEdgeProperties();
  
  /**
   * Get a {@link VertexStatistics} object to find out execution statistics
   * about the given {@link Vertex}.
   * 
This only provides point in time values for statistics (completed tasks) * and must be called again to get updated values. * * @param vertexName * Name of the {@link Vertex} * @return {@link VertexStatistics} for the given vertex */ public VertexStatistics getVertexStatistics(String vertexName); /** * Get the name of the vertex * @return Vertex name */ public String getVertexName(); /** * Get the payload set for the plugin * @return user payload */ public UserPayload getUserPayload(); /** * Get the number of tasks in the given vertex * @param vertexName * @return Total number of tasks in this vertex */ public int getVertexNumTasks(String vertexName); /** * Get the resource allocated to a task of this vertex * @return Resource */ Resource getVertexTaskResource(); /** * Get the total resource allocated to this vertex. If the DAG is running in * a busy cluster then it may have no resources available dedicated to it. The * DAG may divide its available resource among member vertices. * @return Resource */ Resource getTotalAvailableResource(); /** * Get the number of nodes in the cluster * @return Number of nodes */ int getNumClusterNodes(); /** * Set the new parallelism (number of tasks) of this vertex, * Map of source (input) vertices and edge managers to change the event routing * between the source tasks and the new destination tasks and the number of physical inputs for root inputs. * This API can change the parallelism only once. Subsequent attempts will be * disallowed * @param parallelism New number of tasks in the vertex * @param locationHint the placement policy for tasks. * @param sourceEdgeManagers Edge Managers to be updated * @param rootInputSpecUpdate Updated Root Input specifications, if any. * If none specified, a default of 1 physical input is used */ @Deprecated public void setVertexParallelism(int parallelism, @Nullable VertexLocationHint locationHint, @Nullable Map sourceEdgeManagers, @Nullable Map rootInputSpecUpdate); /** * API to reconfigure a {@link Vertex} that is reading root inputs based on * the data read from the root inputs. Root inputs are external data sources * that provide the initial data for the DAG and are added to the * {@link Vertex} using the * {@link Vertex#addDataSource(String, DataSourceDescriptor)} API. Typically, * the parallelism of such vertices is determined at runtime by gathering * information about the data source. This API may be used to set the * parallelism of the vertex at runtime based on the data sources, as well as * changing the specification for those inputs. In addition, changing * parallelism is often accompanied by changing the {@link EdgeProperty} of * the source {@link Edge} because event routing between source and * destination tasks may need to be updated to account for the new task * parallelism. This method can be called to update the parallelism multiple * times until any of the tasks of the vertex have been scheduled (by invoking * {@link #scheduleTasks(List)}. If needed, the original source edge * properties may be obtained via {@link #getInputVertexEdgeProperties()} * * @param parallelism * New number of tasks in the vertex * @param locationHint * the placement policy for tasks specified at * {@link VertexLocationHint}s * @param sourceEdgeProperties * Map with Key=name of {@link Edge} to be updated and Value= * {@link EdgeProperty}. The name of the Edge will be the * corresponding source vertex name. * @param rootInputSpecUpdate * The key of the map is the name of the data source and the value is * the updated {@link InputSpecUpdate} for that data source. If none * specified, a default value is used. See {@link InputSpecUpdate} * for details. */ public void reconfigureVertex(int parallelism, @Nullable VertexLocationHint locationHint, @Nullable Map sourceEdgeProperties, @Nullable Map rootInputSpecUpdate); /** * API to reconfigure a {@link Vertex} by changing its task parallelism. Task * parallelism is often accompanied by changing the {@link EdgeProperty} of * the source {@link Edge} because event routing between source and * destination tasks may need to be updated to account for the new task * parallelism. This method can be called to update the parallelism multiple * times until any of the tasks of the vertex have been scheduled (by invoking * {@link #scheduleTasks(List)}. If needed, the original source edge * properties may be obtained via {@link #getInputVertexEdgeProperties()} * * @param parallelism * New number of tasks in the vertex * @param locationHint * the placement policy for tasks specified at * {@link VertexLocationHint}s * @param sourceEdgeProperties * Map with Key=name of {@link Edge} to be updated and Value= * {@link EdgeProperty}. The name of the Edge will be the * corresponding source vertex name. */ public void reconfigureVertex(int parallelism, @Nullable VertexLocationHint locationHint, @Nullable Map sourceEdgeProperties); /** * API to reconfigure a {@link Vertex} that is reading root inputs based on * the data read from the root inputs. Root inputs are external data sources * that provide the initial data for the DAG and are added to the * {@link Vertex} using the * {@link Vertex#addDataSource(String, DataSourceDescriptor)} API. Typically, * the parallelism of such vertices is determined at runtime by gathering * information about the data source. This API may be used to set the * parallelism of the vertex at runtime based on the data sources, as well as * changing the specification for those inputs. * @param rootInputSpecUpdate * The key of the map is the name of the data source and the value is * the updated {@link InputSpecUpdate} for that data source. If none * specified, a default value is used. See {@link InputSpecUpdate} * for details. * @param locationHint * the placement policy for tasks specified at * {@link VertexLocationHint}s * @param parallelism * New number of tasks in the vertex */ public void reconfigureVertex(@Nullable Map rootInputSpecUpdate, @Nullable VertexLocationHint locationHint, int parallelism); /** * Allows a VertexManagerPlugin to assign Events for Root Inputs * * For regular Event Routing changes - the EdgeManager should be configured * via the setVertexParallelism method * * @param inputName * The input name associated with the event * @param events * The list of Events to be assigned to various tasks belonging to * the Vertex. The target index on individual events represents the * task to which events need to be sent. */ public void addRootInputEvents(String inputName, Collection events); /** * Allows a VertexManagerPlugin to send events of custom payload to processor * of a specific task of managed vertex * * It's up to user to make sure taskId is valid * * @param events events to be sent * @param taskId id of a task of managed vertex */ public void sendEventToProcessor(Collection events, int taskId); @Deprecated /** * Replaced by {@link #scheduleTasks(List)} * Notify the vertex to start the given tasks * @param tasks Indices of the tasks to be started */ public void scheduleVertexTasks(List tasks); /** * Notify the vertex to schedule the given tasks * @param tasks Identifier and metadata for the tasks to schedule */ public void scheduleTasks(List tasks); /** * Get the names of the non-vertex inputs of this vertex. These are primary * sources of data. * @return Names of inputs to this vertex. Maybe null if there are no inputs */ @Nullable public Set getVertexInputNames(); /** * Set the placement hint for tasks in this vertex * * @param locationHint */ public void setVertexLocationHint(VertexLocationHint locationHint); /** * @return DAG Attempt number */ public int getDAGAttemptNumber(); /** * Register to get notifications on updates to the specified vertex. Notifications will be sent * via {@link VertexManagerPlugin#onVertexStateUpdated(org.apache.tez.dag.api.event.VertexStateUpdate)} * * This method can only be invoked once. Duplicate invocations will result in an error. * * @param vertexName the vertex name for which notifications are required. * @param stateSet the set of states for which notifications are required. null implies all */ void registerForVertexStateUpdates(String vertexName, @Nullable Set stateSet); /** * Optional API. No need to call this when the vertex is not fully defined to * start with. E.g. vertex parallelism is not defined, or edges are not * configured. In that case, Tez will assume that the vertex needs * reconfiguration. If the vertex is already fully defined, but the * {@link VertexManagerPlugin} wants to reconfigure the vertex, then it must * use this API to inform Tez about its intention. Without invoking this * method, it is invalid to re-configure the vertex if * the vertex is already fully defined. This can be invoked at any time until * {@link VertexManagerPlugin#initialize()} has completed. Its invalid to * invoke this method after {@link VertexManagerPlugin#initialize()} has * completed
* If this API is invoked, then {@link #doneReconfiguringVertex()} must be * invoked after the {@link VertexManagerPlugin} is done reconfiguring the * vertex, . Actions like scheduling tasks or sending events do not count as * reconfiguration. */ public void vertexReconfigurationPlanned(); /** * Optional API. This needs to be called only if {@link #vertexReconfigurationPlanned()} has been * invoked. This must be called after {@link #vertexReconfigurationPlanned()} is called. */ public void doneReconfiguringVertex(); /** * Optional API. This API can be invoked to declare that the * {@link VertexManagerPlugin} is done with its work. After this the system * will not invoke the plugin methods any more. Its invalid for the plugin to * make further invocations of the context APIs after this. This can be used * to stop receiving further {@link VertexState} notifications after the * plugin has made all changes. */ // TODO must be done later after TEZ-1714 //public void vertexManagerDone(); /** * Get input vertex groups of this vertex, including vertex group name and * all members vertex name * * @return map whose key is vertex group name and value is list of members' name, * or empty map if there is no input vertex group. */ Map> getInputVertexGroups(); }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy