All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.core.Vertex Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.core;

import com.hazelcast.function.SupplierEx;
import com.hazelcast.jet.impl.ProcessorClassLoaderTLHolder;
import com.hazelcast.jet.impl.execution.init.CustomClassLoadedObject;
import com.hazelcast.nio.ObjectDataInput;
import com.hazelcast.nio.ObjectDataOutput;
import com.hazelcast.nio.serialization.IdentifiedDataSerializable;
import com.hazelcast.spi.annotation.PrivateApi;

import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.function.UnaryOperator;

import static com.hazelcast.internal.util.Preconditions.checkNotNull;
import static com.hazelcast.jet.impl.util.Util.checkSerializable;
import static com.hazelcast.jet.impl.util.Util.doWithClassLoader;
import static java.lang.Math.min;

/**
 * Represents a unit of data processing in a Jet computation job. Conceptually,
 * a vertex receives data items over its inbound {@link Edge edges} and pushes
 * data items to its outbound edges. Practically, a single vertex is represented
 * by a set of instances of {@link Processor}. The {@code localParallelism} property
 * determines the number of processor instances running on each cluster member.
 * 

* Each processor is assigned a set of partition IDs it is responsible for. When * an inbound edge is partitioned, the processor will receive only those * data items whose partition ID it is responsible for. For data traveling over a * partitioned edge which is also distributed, the whole cluster contains * a single unique processor instance responsible for any given partition ID. For * non-distributed edges, the processor is unique only within a member and each * member has its own processor for any given partition ID. Finally, there is a * guarantee of collation across all the partitioned edges impinging on a vertex: * within each member, all the data with a given partition ID is received by the * same processor. *

* A vertex is uniquely identified in a DAG by its name. * * @since Jet 3.0 */ public class Vertex implements IdentifiedDataSerializable { /** * The value of {@link #localParallelism(int)} with the meaning * "use the default local parallelism". */ public static final int LOCAL_PARALLELISM_USE_DEFAULT = -1; private boolean locked; private ProcessorMetaSupplier metaSupplier; private String name; private int localParallelism = -1; /** * Constructor used internally for deserialization. */ Vertex() { } /** * Creates a vertex from a {@code Supplier}. *

* This is useful for vertices where all the {@code Processor} instances * will be instantiated the same way. *

* NOTE: this constructor should not be abused with a stateful * implementation which produces a different processor each time. In such a * case the full {@code ProcessorSupplier} type should be implemented. * * @param name the unique name of the vertex. This name identifies the vertex in the snapshot * @param processorSupplier the simple, parameterless supplier of {@code Processor} instances */ public Vertex(@Nonnull String name, @Nonnull SupplierEx processorSupplier) { this(name, ProcessorMetaSupplier.of(processorSupplier)); } /** * Creates a vertex from a {@code ProcessorSupplier}. * * @param name the unique name of the vertex. This name identifies the vertex in the snapshot * @param processorSupplier the supplier of {@code Processor} instances which will be used on all members */ public Vertex(@Nonnull String name, @Nonnull ProcessorSupplier processorSupplier) { this(name, ProcessorMetaSupplier.of(processorSupplier)); } /** * Creates a vertex from a {@code ProcessorMetaSupplier}. * * @param name the unique name of the vertex. This name identifies the vertex in the snapshot * @param metaSupplier the meta-supplier of {@code ProcessorSupplier}s for each member * */ public Vertex(@Nonnull String name, @Nonnull ProcessorMetaSupplier metaSupplier) { checkNotNull(name, "name"); checkNotNull(metaSupplier, "supplier"); checkSerializable(metaSupplier, "metaSupplier"); this.metaSupplier = metaSupplier; this.name = name; } /** * Says whether the given integer is valid as the value of {@link * #localParallelism(int) localParallelism}. */ public static int checkLocalParallelism(int parallelism) { if (parallelism != LOCAL_PARALLELISM_USE_DEFAULT && parallelism <= 0) { throw new IllegalArgumentException("Parallelism must be either -1 or a positive number"); } return parallelism; } /** * Determines the local parallelism value for the vertex by looking at * its local parallelism and meta supplier's preferred local parallelism. *

* If none of them is set, returns the provided default parallelism */ public int determineLocalParallelism(int defaultParallelism) { int localParallelism = this.localParallelism; int preferredLocalParallelism = this.metaSupplier.preferredLocalParallelism(); checkLocalParallelism(preferredLocalParallelism); checkLocalParallelism(localParallelism); return localParallelism != LOCAL_PARALLELISM_USE_DEFAULT ? localParallelism : preferredLocalParallelism != LOCAL_PARALLELISM_USE_DEFAULT ? defaultParallelism == LOCAL_PARALLELISM_USE_DEFAULT ? preferredLocalParallelism : min(preferredLocalParallelism, defaultParallelism) : defaultParallelism; } /** * Sets the number of processors corresponding to this vertex that will be * created on each member. *

* If the value is {@value #LOCAL_PARALLELISM_USE_DEFAULT}, Jet will * determine the vertex's local parallelism during job initialization * from the global default and processor meta-supplier's preferred value. */ @Nonnull public Vertex localParallelism(int localParallelism) { throwIfLocked(); this.localParallelism = checkLocalParallelism(localParallelism); return this; } /** * Returns the number of processors corresponding to this vertex that will * be created on each member. A value of {@code -1} means that this * property is not set; in that case the default configured on the Jet * instance will be used. */ public int getLocalParallelism() { return localParallelism; } /** * Returns the name of this vertex. */ @Nonnull public String getName() { return name; } /** * Returns this vertex's meta-supplier of processors. */ @Nonnull public ProcessorMetaSupplier getMetaSupplier() { return metaSupplier; } /** * Applies the provided operator function to the current processor * meta-supplier and replaces it with the one it returns. Typically used to * decorate the existing meta-supplier. */ public void updateMetaSupplier(@Nonnull UnaryOperator updateFn) { throwIfLocked(); metaSupplier = updateFn.apply(metaSupplier); } @Override public String toString() { return "Vertex " + name; } // Implementation of IdentifiedDataSerializable @Override public void writeData(@Nonnull ObjectDataOutput out) throws IOException { out.writeInt(localParallelism); out.writeString(name); CustomClassLoadedObject.write(out, metaSupplier); } @Override public void readData(@Nonnull ObjectDataInput in) throws IOException { localParallelism = in.readInt(); name = in.readString(); metaSupplier = doWithClassLoader(ProcessorClassLoaderTLHolder.get(name), () -> CustomClassLoadedObject.read(in)); } @Override public int getFactoryId() { return JetDataSerializerHook.FACTORY_ID; } @Override public int getClassId() { return JetDataSerializerHook.VERTEX; } // END Implementation of IdentifiedDataSerializable private void throwIfLocked() { if (locked) { throw new IllegalStateException("Edge is already locked"); } } /** * Used to prevent further mutations this instance after submitting it for execution. *

* It's not a public API, can be removed in the future. */ @PrivateApi void lock() { locked = true; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy