com.hazelcast.jet.core.Edge Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hazelcast-jet Show documentation
There is a newer version: 4.5.4
/*
 * Copyright (c) 2008-2018, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.core;

import com.hazelcast.jet.config.EdgeConfig;
import com.hazelcast.jet.function.DistributedFunction;
import com.hazelcast.jet.impl.MasterContext;
import com.hazelcast.jet.impl.SerializationConstants;
import com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.nio.serialization.IdentifiedDataSerializable;
import com.hazelcast.util.UuidUtil;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.io.Serializable;
import java.util.Map;

import static com.hazelcast.jet.core.Partitioner.defaultPartitioner;
import static com.hazelcast.jet.function.DistributedFunctions.wholeItem;
import static com.hazelcast.jet.impl.util.Util.checkSerializable;

/**
 * Represents an edge between two {@link Vertex vertices} in a {@link DAG}.
 * Conceptually, data travels over the edge from the source vertex to the
 * destination vertex. Practically, since the vertex is distributed across
 * the cluster and across threads in each cluster member, the edge is
 * implemented by a number of concurrent queues and network sender/receiver
 * pairs.
 * 
 * It is often desirable to arrange that all items belonging to the same
 * collation key are received by the same processing unit (instance of
 * {@link Processor}). This is achieved by configuring an appropriate
 * {@link Partitioner} on the edge. The partitioner will determine the
 * partition ID of each item and all items with the same partition ID will
 * be routed to the same {@code Processor} instance. Depending on the value
 * of edge's distributed property, the processor will be unique
 * cluster-wide, or only within each member.
 * 

 * A newly instantiated Edge is non-distributed with a {@link
 * RoutingPolicy#UNICAST UNICAST} routing policy.
 */
public class Edge implements IdentifiedDataSerializable {

    private Vertex source; // transient field, restored during DAG deserialization
    private String sourceName;
    private int sourceOrdinal;

    private Vertex destination; // transient field, restored during DAG deserialization
    private String destName;
    private int destOrdinal;

    private int priority;
    private boolean isDistributed;
    private Partitioner partitioner;
    private RoutingPolicy routingPolicy = RoutingPolicy.UNICAST;

    private EdgeConfig config;

    protected Edge() {
    }

    protected Edge(@Nonnull Vertex source, int sourceOrdinal, Vertex destination, int destOrdinal) {
        this.source = source;
        this.sourceName = source.getName();
        this.sourceOrdinal = sourceOrdinal;

        this.destination = destination;
        this.destName = destination != null ? destination.getName() : null;
        this.destOrdinal = destOrdinal;
    }

    /**
     * Returns an edge between two vertices. The ordinal of the edge
     * is 0 at both ends. Equivalent to {@code from(source).to(destination)}.
     *
     * @param source        the source vertex
     * @param destination   the destination vertex
     */
    @Nonnull
    public static Edge between(@Nonnull Vertex source, @Nonnull Vertex destination) {
        return new Edge(source, 0, destination, 0);
    }

    /**
     * Returns an edge with the given source vertex and no destination vertex.
     * The ordinal of the edge is 0. Typically followed by one of the
     * {@code to()} method calls.
     */
    @Nonnull
    public static Edge from(@Nonnull Vertex source) {
        return from(source, 0);
    }

    /**
     * Returns an edge with the given source vertex at the given ordinal
     * and no destination vertex. Typically follewed by a call to one of
     * the {@code to()} methods.
     */
    @Nonnull
    public static Edge from(@Nonnull Vertex source, int ordinal) {
        return new Edge(source, ordinal, null, 0);
    }

    /**
     * Sets the destination vertex of this edge, with ordinal 0.
     */
    @Nonnull
    public Edge to(@Nonnull Vertex destination) {
        this.destination = destination;
        this.destName = destination.getName();
        return this;
    }

    /**
     * Sets the destination vertex and ordinal of this edge.
     */
    @Nonnull
    public Edge to(@Nonnull Vertex destination, int ordinal) {
        this.destination = destination;
        this.destName = destination.getName();
        this.destOrdinal = ordinal;
        return this;
    }

    /**
     * Returns this edge's source vertex.
     */
    @Nonnull
    public Vertex getSource() {
        return source;
    }

    /**
     * Returns this edge's destination vertex.
     */
    public Vertex getDestination() {
        return destination;
    }

    /**
     * Returns the name of the source vertex.
     */
    @Nonnull
    public String getSourceName() {
        return sourceName;
    }

    /**
     * Returns the ordinal of the edge at the source vertex.
     */
    public int getSourceOrdinal() {
        return sourceOrdinal;
    }

    /**
     * Returns the name of the destination vertex.
     */
    public String getDestName() {
        return destName;
    }

    /**
     * Returns the ordinal of the edge at the destination vertex.
     */
    public int getDestOrdinal() {
        return destOrdinal;
    }

    /**
     * Sets the priority of the edge. A lower number means higher priority
     * and the default is 0.
     * 

     * Example: there two incoming edges on a vertex, with priorities 1 and 2.
     * The data from the edge with priority 1 will be processed in full before
     * accepting any data from the edge with priority 2.
     * 

     * Note: having different priority edges will cause postponing of
     * the first snapshot until after upstream vertices of higher priority
     * edges are completed.
     * Reason: after receiving a {@link
     * com.hazelcast.jet.impl.execution.SnapshotBarrier barrier} we stop
     * processing items on that edge until the barrier is received from all
     * other edges. However, we also don't process lower priority edges until
     * higher priority edges are done, which prevents receiving the barrier on
     * them, which in the end stalls the job indefinitely. Technically this
     * applies only to {@link
     * com.hazelcast.jet.config.ProcessingGuarantee#EXACTLY_ONCE EXACTLY_ONCE}
     * snapshot mode, but the snapshot is also postponed for {@link
     * com.hazelcast.jet.config.ProcessingGuarantee#AT_LEAST_ONCE
     * AT_LEAST_ONCE} jobs, because the snapshot won't complete until after all
     * higher priority edges are completed and will increase the number of
     * duplicately processed items.
     */
    @Nonnull
    public Edge priority(int priority) {
        if (priority == MasterContext.SNAPSHOT_RESTORE_EDGE_PRIORITY) {
            throw new IllegalArgumentException("priority must not be Integer.MIN_VALUE ("
                    + MasterContext.SNAPSHOT_RESTORE_EDGE_PRIORITY + ')');
        }
        this.priority = priority;
        return this;
    }

    /**
     * Returns the value of edge's priority, as explained on
     * {@link #priority(int)}.
     */
    public int getPriority() {
        return priority;
    }

    /**
     * Activates the {@link RoutingPolicy#PARTITIONED PARTITIONED} routing
     * policy and applies the {@link Partitioner#defaultPartitioner() default}
     * Hazelcast partitioning strategy. The strategy is applied to the result of
     * the {@code extractKeyFn} function.
     */
    @Nonnull
    public  Edge partitioned(@Nonnull DistributedFunction extractKeyFn) {
        return partitioned(extractKeyFn, defaultPartitioner());
    }

    /**
     * Activates the {@link RoutingPolicy#PARTITIONED PARTITIONED} routing
     * policy and applies the provided partitioning strategy. The strategy
     * is applied to the result of the {@code extractKeyFn} function.
     */
    @Nonnull
    public  Edge partitioned(
            @Nonnull DistributedFunction extractKeyFn,
            @Nonnull Partitioner partitioner
    ) {
        checkSerializable(extractKeyFn, "extractKeyFn");
        checkSerializable(partitioner, "partitioner");
        this.routingPolicy = RoutingPolicy.PARTITIONED;
        this.partitioner = new KeyPartitioner<>(extractKeyFn, partitioner);
        return this;
    }

    /**
     * Activates a special-cased {@link RoutingPolicy#PARTITIONED PARTITIONED}
     * routing policy where all items will be assigned the same, randomly
     * chosen partition ID. Therefore all items will be directed to the same
     * processor.
     */
    @Nonnull
    public Edge allToOne() {
        return partitioned(wholeItem(), new Single());
    }

    /**
     * Activates the {@link RoutingPolicy#BROADCAST BROADCAST} routing policy.
     */
    @Nonnull
    public Edge broadcast() {
        routingPolicy = RoutingPolicy.BROADCAST;
        return this;
    }

    /**
     * Activates the {@link RoutingPolicy#ISOLATED ISOLATED} routing policy
     * which establishes isolated paths from upstream to downstream processors.
     * Each downstream processor is assigned exactly one upstream processor and
     * each upstream processor is assigned a disjoint subset of downstream
     * processors. This allows the selective application of backpressure to
     * just one source processor that feeds a given downstream processor.
     * 

     * These restrictions imply that the downstream's local parallelism
     * cannot be less than upstream's. Since all traffic will be local, this
     * policy is not allowed on a distributed edge.
     */
    @Nonnull
    public Edge isolated() {
        routingPolicy = RoutingPolicy.ISOLATED;
        return this;
    }

    /**
     * Returns the instance encapsulating the partitioning strategy in effect
     * on this edge.
     */
    public Partitioner getPartitioner() {
        return partitioner;
    }

    /**
     * Returns the {@link RoutingPolicy} in effect on the edge.
     */
    @Nonnull
    public RoutingPolicy getRoutingPolicy() {
        return routingPolicy;
    }

    /**
     * Declares that the edge is distributed. A non-distributed edge only
     * transfers data within the same member. If the data source running on
     * local member is distributed (produces only a slice of all the data on
     * any given member), the local processors will not observe all the data.
     * The same holds true when the data originates from an upstream
     * distributed edge.
     * 

     * A distributed edge allows all the data to be observed by all
     * the processors (using the {@link RoutingPolicy#BROADCAST BROADCAST}
     * routing policy) and, more attractively, all the data with a given
     * partition ID to be observed by the same unique processor, regardless of
     * whether it is running on the local or a remote member (using the {@link
     * RoutingPolicy#PARTITIONED PARTITIONED} routing policy).
     */
    public Edge distributed() {
        isDistributed = true;
        return this;
    }

    /**
     * Says whether this edge is distributed. The effects of this
     * property are discussed in {@link #distributed()}.
     */
    public boolean isDistributed() {
        return isDistributed;
    }

    /**
     * Returns the {@code EdgeConfig} instance associated with this edge.
     */
    public EdgeConfig getConfig() {
        return config;
    }

    /**
     * Assigns an {@code EdgeConfig} to this edge.
     */
    public Edge setConfig(EdgeConfig config) {
        this.config = config;
        return this;
    }

    @Nonnull @Override
    public String toString() {
        final StringBuilder b = new StringBuilder();
        if (sourceOrdinal == 0 && destOrdinal == 0) {
            b.append("between(\"").append(sourceName).append("\", \"").append(destName).append("\")");
        } else {
            b.append("from(\"").append(sourceName).append('"');
            if (sourceOrdinal != 0) {
                b.append(", ").append(sourceOrdinal);
            }
            b.append(").to(\"").append(destName).append('"');
            if (destOrdinal != 0) {
                b.append(", ").append(destOrdinal);
            }
            b.append(')');
        }
        switch (getRoutingPolicy()) {
            case UNICAST:
                break;
            case ISOLATED:
                b.append(".isolated()");
                break;
            case PARTITIONED:
                b.append(getPartitioner() instanceof Single ? ".allToOne()" : ".partitioned(?)");
                break;
            case BROADCAST:
                b.append(".broadcast()");
                break;
            default:
        }
        if (isDistributed()) {
            b.append(".distributed()");
        }
        if (getPriority() != 0) {
            b.append(".priority(").append(getPriority()).append(')');
        }
        return b.toString();
    }

    @Override
    public boolean equals(Object obj) {
        final Edge that;
        return this == obj
                || obj instanceof Edge
                    && this.sourceName.equals((that = (Edge) obj).sourceName)
                    && this.destName.equals(that.destName);
    }

    @Override
    public int hashCode() {
        return 37 * sourceName.hashCode() + destName.hashCode();
    }

    void restoreSourceAndDest(Map nameToVertex) {
        source = nameToVertex.get(sourceName);
        destination = nameToVertex.get(destName);
        assert source != null : "Couldn't restore source vertex " + sourceName + " from map " + nameToVertex;
        assert destination != null : "Couldn't restore destination vertex " + destName + " from map " + nameToVertex;
    }

    // Implementation of IdentifiedDataSerializable

    @Override
    public void writeData(@Nonnull ObjectDataOutput out) throws IOException {
        out.writeUTF(getSourceName());
        out.writeInt(getSourceOrdinal());
        out.writeUTF(getDestName());
        out.writeInt(getDestOrdinal());
        out.writeInt(getPriority());
        out.writeBoolean(isDistributed());
        out.writeObject(getRoutingPolicy());
        CustomClassLoadedObject.write(out, getPartitioner());
        out.writeObject(getConfig());
    }

    @Override
    public void readData(@Nonnull ObjectDataInput in) throws IOException {
        sourceName = in.readUTF();
        sourceOrdinal = in.readInt();
        destName = in.readUTF();
        destOrdinal = in.readInt();
        priority = in.readInt();
        isDistributed = in.readBoolean();
        routingPolicy = in.readObject();
        partitioner = CustomClassLoadedObject.read(in);
        config = in.readObject();
    }

    @Override
    public int getFactoryId() {
        return SerializationConstants.FACTORY_ID;
    }

    @Override
    public int getId() {
        return SerializationConstants.EDGE;
    }

    // END Implementation of IdentifiedDataSerializable


    /**
     * An edge describes a connection from many upstream processors to many
     * downstream processors. The routing policy decides where exactly to route
     * each particular item emitted from an upstream processor. To simplify
     * the reasoning we introduce the concept of the set of candidate
     * downstream processors, or the candidate set for short. On
     * a local edge the candidate set contains only local processors and on a
     * distributed edge it contain all the processors.
     */
    public enum RoutingPolicy implements Serializable {
        /**
         * For each item a single destination processor is chosen from the
         * candidate set, with no restriction on the choice.
         */
        UNICAST,
        /**
         * Like {@link #UNICAST}, but guarantees that any given downstream
         * processor receives data from exactly one upstream processor. This is
         * needed in some DAG setups to apply selective backpressure to individual
         * upstream source processors.
         * 

         * The downstream's local parallelism must not be less than the upstream's.
         * This policy is only available on a local edge.
         */
        ISOLATED,
        /**
         * Each item is sent to the one processor responsible for the item's
         * partition ID. On a distributed edge the processor is unique across the
         * cluster; on a non-distributed edge the processor is unique only within a
         * member.
         */
        PARTITIONED,
        /**
         * Each item is sent to all candidate processors.
         */
        BROADCAST
    }

    private static class Single implements Partitioner