All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.graph.StreamGraphHasherV2 Maven / Gradle / Ivy

There is a newer version: 2.0-preview1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.graph;

import org.apache.flink.api.dag.Transformation;
import org.apache.flink.runtime.jobgraph.JobVertexID;
import org.apache.flink.streaming.api.operators.UdfStreamOperatorFactory;

import org.apache.flink.shaded.guava30.com.google.common.hash.HashFunction;
import org.apache.flink.shaded.guava30.com.google.common.hash.Hasher;
import org.apache.flink.shaded.guava30.com.google.common.hash.Hashing;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.charset.Charset;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import java.util.Set;

import static org.apache.flink.util.StringUtils.byteToHexString;

/**
 * StreamGraphHasher from Flink 1.2. This contains duplicated code to ensure that the algorithm does
 * not change with future Flink versions.
 *
 * 

DO NOT MODIFY THIS CLASS */ public class StreamGraphHasherV2 implements StreamGraphHasher { private static final Logger LOG = LoggerFactory.getLogger(StreamGraphHasherV2.class); /** * Returns a map with a hash for each {@link StreamNode} of the {@link StreamGraph}. The hash is * used as the {@link JobVertexID} in order to identify nodes across job submissions if they * didn't change. * *

The complete {@link StreamGraph} is traversed. The hash is either computed from the * transformation's user-specified id (see {@link Transformation#getUid()}) or generated in a * deterministic way. * *

The generated hash is deterministic with respect to: * *

    *
  • node-local properties (node ID), *
  • chained output nodes, and *
  • input nodes hashes *
* * @return A map from {@link StreamNode#id} to hash as 16-byte array. */ @Override public Map traverseStreamGraphAndGenerateHashes(StreamGraph streamGraph) { // The hash function used to generate the hash final HashFunction hashFunction = Hashing.murmur3_128(0); final Map hashes = new HashMap<>(); Set visited = new HashSet<>(); Queue remaining = new ArrayDeque<>(); // We need to make the source order deterministic. The source IDs are // not returned in the same order, which means that submitting the same // program twice might result in different traversal, which breaks the // deterministic hash assignment. List sources = new ArrayList<>(); for (Integer sourceNodeId : streamGraph.getSourceIDs()) { sources.add(sourceNodeId); } Collections.sort(sources); // // Traverse the graph in a breadth-first manner. Keep in mind that // the graph is not a tree and multiple paths to nodes can exist. // // Start with source nodes for (Integer sourceNodeId : sources) { remaining.add(streamGraph.getStreamNode(sourceNodeId)); visited.add(sourceNodeId); } StreamNode currentNode; while ((currentNode = remaining.poll()) != null) { // Generate the hash code. Because multiple path exist to each // node, we might not have all required inputs available to // generate the hash code. if (generateNodeHash( currentNode, hashFunction, hashes, streamGraph.isChainingEnabled(), streamGraph)) { // Add the child nodes for (StreamEdge outEdge : currentNode.getOutEdges()) { StreamNode child = streamGraph.getTargetVertex(outEdge); if (!visited.contains(child.getId())) { remaining.add(child); visited.add(child.getId()); } } } else { // We will revisit this later. visited.remove(currentNode.getId()); } } return hashes; } /** * Generates a hash for the node and returns whether the operation was successful. * * @param node The node to generate the hash for * @param hashFunction The hash function to use * @param hashes The current state of generated hashes * @return true if the node hash has been generated. false, otherwise. * If the operation is not successful, the hash needs be generated at a later point when all * input is available. * @throws IllegalStateException If node has user-specified hash and is intermediate node of a * chain */ private boolean generateNodeHash( StreamNode node, HashFunction hashFunction, Map hashes, boolean isChainingEnabled, StreamGraph streamGraph) { // Check for user-specified ID String userSpecifiedHash = node.getTransformationUID(); if (userSpecifiedHash == null) { // Check that all input nodes have their hashes computed for (StreamEdge inEdge : node.getInEdges()) { // If the input node has not been visited yet, the current // node will be visited again at a later point when all input // nodes have been visited and their hashes set. if (!hashes.containsKey(inEdge.getSourceId())) { return false; } } Hasher hasher = hashFunction.newHasher(); byte[] hash = generateDeterministicHash(node, hasher, hashes, isChainingEnabled, streamGraph); if (hashes.put(node.getId(), hash) != null) { // Sanity check throw new IllegalStateException( "Unexpected state. Tried to add node hash " + "twice. This is probably a bug in the JobGraph generator."); } return true; } else { Hasher hasher = hashFunction.newHasher(); byte[] hash = generateUserSpecifiedHash(node, hasher); for (byte[] previousHash : hashes.values()) { if (Arrays.equals(previousHash, hash)) { throw new IllegalArgumentException( "Hash collision on user-specified ID " + "\"" + userSpecifiedHash + "\". " + "Most likely cause is a non-unique ID. Please check that all IDs " + "specified via `uid(String)` are unique."); } } if (hashes.put(node.getId(), hash) != null) { // Sanity check throw new IllegalStateException( "Unexpected state. Tried to add node hash " + "twice. This is probably a bug in the JobGraph generator."); } return true; } } /** Generates a hash from a user-specified ID. */ private byte[] generateUserSpecifiedHash(StreamNode node, Hasher hasher) { hasher.putString(node.getTransformationUID(), Charset.forName("UTF-8")); return hasher.hash().asBytes(); } /** Generates a deterministic hash from node-local properties and input and output edges. */ private byte[] generateDeterministicHash( StreamNode node, Hasher hasher, Map hashes, boolean isChainingEnabled, StreamGraph streamGraph) { // Include stream node to hash. We use the current size of the computed // hashes as the ID. We cannot use the node's ID, because it is // assigned from a static counter. This will result in two identical // programs having different hashes. generateNodeLocalHash(hasher, hashes.size()); // Include chained nodes to hash for (StreamEdge outEdge : node.getOutEdges()) { if (isChainable(outEdge, isChainingEnabled, streamGraph)) { // Use the hash size again, because the nodes are chained to // this node. This does not add a hash for the chained nodes. generateNodeLocalHash(hasher, hashes.size()); } } byte[] hash = hasher.hash().asBytes(); // Make sure that all input nodes have their hash set before entering // this loop (calling this method). for (StreamEdge inEdge : node.getInEdges()) { byte[] otherHash = hashes.get(inEdge.getSourceId()); // Sanity check if (otherHash == null) { throw new IllegalStateException( "Missing hash for input node " + streamGraph.getSourceVertex(inEdge) + ". Cannot generate hash for " + node + "."); } for (int j = 0; j < hash.length; j++) { hash[j] = (byte) (hash[j] * 37 ^ otherHash[j]); } } if (LOG.isDebugEnabled()) { String udfClassName = ""; if (node.getOperatorFactory() instanceof UdfStreamOperatorFactory) { udfClassName = ((UdfStreamOperatorFactory) node.getOperatorFactory()) .getUserFunctionClassName(); } LOG.debug( "Generated hash '" + byteToHexString(hash) + "' for node " + "'" + node.toString() + "' {id: " + node.getId() + ", " + "parallelism: " + node.getParallelism() + ", " + "user function: " + udfClassName + "}"); } return hash; } /** * Applies the {@link Hasher} to the {@link StreamNode} . The hasher encapsulates the current * state of the hash. * *

The specified ID is local to this node. We cannot use the {@link StreamNode#id}, because * it is incremented in a static counter. Therefore, the IDs for identical jobs will otherwise * be different. */ private void generateNodeLocalHash(Hasher hasher, int id) { // This resolves conflicts for otherwise identical source nodes. BUT // the generated hash codes depend on the ordering of the nodes in the // stream graph. hasher.putInt(id); } private boolean isChainable( StreamEdge edge, boolean isChainingEnabled, StreamGraph streamGraph) { return isChainingEnabled && StreamingJobGraphGenerator.isChainable(edge, streamGraph); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy