
com.hazelcast.jet.core.Partitioner Maven / Gradle / Ivy
/*
* Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.core;
import javax.annotation.Nonnull;
import java.io.Serial;
import java.io.Serializable;
/**
* Encapsulates the logic associated with a {@link DAG} edge that decides
* on the partition ID of an item traveling over it. The partition ID
* determines which cluster member and which instance of {@link Processor} on
* that member an item will be forwarded to.
*
* Jet's partitioning piggybacks on Hazelcast partitioning. Standard Hazelcast
* protocols are used to distribute partition ownership over the members of the
* cluster. However, if a DAG edge is configured as non-distributed, then on each
* member there will be some destination processor responsible for any given
* partition.
*
* @param type of item the partitioner accepts
*
* @since Jet 3.0
*/
@FunctionalInterface
public interface Partitioner extends Serializable {
/**
* Partitioner which calls {@link Object#hashCode()} and coerces it with the
* modulo operation into the allowed range of partition IDs. The primary
* reason to prefer this over the default is performance, and it's a safe
* choice on local edges.
*
* WARNING: this is a dangerous strategy to use on
* distributed edges. Care must be taken to ensure that the produced
* hashcode remains stable across serialization-deserialization cycles as
* well as across all JVM processes. Consider a {@code hashCode()} method
* that is correct with respect to its contract, but not with respect to
* the stricter contract given above. Take the following scenario:
*
-
* there are two Jet cluster members;
*
-
* there is a DAG vertex;
*
-
* on each member there is a processor for this vertex;
*
-
* each processor emits an item;
*
-
* these two items have equal partitioning keys;
*
-
* nevertheless, on each member they get a different hashcode;
*
-
* they are routed to different processors, thus failing on the promise
* that all items with the same partition key go to the same processor.
*
*/
Partitioner