All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.Job Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet;

import com.hazelcast.config.MetricsConfig;
import com.hazelcast.jet.config.DeltaJobConfig;
import com.hazelcast.jet.config.JobConfig;
import com.hazelcast.jet.config.ProcessingGuarantee;
import com.hazelcast.jet.core.DAG;
import com.hazelcast.jet.core.JobNotFoundException;
import com.hazelcast.jet.core.JobStatus;
import com.hazelcast.jet.core.JobSuspensionCause;
import com.hazelcast.jet.core.metrics.JobMetrics;
import com.hazelcast.jet.pipeline.Pipeline;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.UUID;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;

/**
 * A handle to Jet computation job created by submitting a {@link DAG} or
 * {@link Pipeline} to the cluster. See {@link JetService} for methods to
 * submit jobs and to get a handle to an existing job.
 *
 * @since Jet 3.0
 */
public interface Job {

    /**
     * Returns {@code true} if this instance represents a {@linkplain
     * JetService#newLightJob(Pipeline) light job}. For a light job, many of
     * the methods in this interface throw {@link
     * UnsupportedOperationException}.
     */
    boolean isLightJob();

    /**
     * Returns the ID of this job.
     */
    long getId();

    /**
     * Returns the string representation of this job's ID.
     */
    @Nonnull
    default String getIdString() {
        return Util.idToString(getId());
    }

    /**
     * Waits for the job to complete and throws an exception if the job
     * completes with an error. Never returns for streaming (unbounded) jobs,
     * unless they fail or are cancelled. In rare cases it can happen that
     * after this method returns, the job is not yet fully cleaned up.
     * 

* Joining a suspended job will block until that job is resumed and * completes. *

* Shorthand for job.getFuture().join(). * * @throws CancellationException if the job was cancelled */ default void join() { getFuture().join(); } /** * Gets the future associated with the job. The returned future is * not cancellable. To cancel the job, the {@link #cancel()} method * should be used. * * @throws IllegalStateException if the job has not started yet. */ @Nonnull CompletableFuture getFuture(); /** * Makes a request to cancel this job and returns. The job will complete * after its execution has stopped on all the nodes, which can happen some * time after this method returns. *

* After cancellation, {@link #join()} will throw a {@link * CancellationException}. *

* If the job is already suspended, Jet will delete its runtime resources * and snapshots and it won't be able to resume again. *

* NOTE: if the cluster becomes unstable (a member leaves * or similar) while the job is in the process of cancellation, it may end * up getting restarted after the cluster has stabilized and won't be * cancelled. Call {@link #getStatus()} to find out and possibly try to * cancel again. *

* The job status will be {@link JobStatus#FAILED} after cancellation. *

* See {@link #cancelAndExportSnapshot(String)} to cancel with a terminal * snapshot. * * @throws IllegalStateException if the cluster is not in a state to * restart the job, for example when coordinator member left and new * coordinator did not yet load job's metadata. * @throws JobNotFoundException for light jobs, if the job already * completed */ void cancel(); /** * Returns the time when the job was submitted to the cluster. *

* The time is assigned by reading {@code System.currentTimeMillis()} of * the coordinator member that executes the job for the first time. It * doesn't change on restart. */ long getSubmissionTime(); /** * Returns the name of this job or {@code null} if no name was supplied. *

* Jobs can be named through {@link JobConfig#setName(String)} prior to * submission. For light jobs it always returns {@code null}. */ @Nullable String getName(); /** * Returns the current status of this job. Each invocation queries the * status from the job coordinator. For light jobs it returns only {@link * JobStatus#RUNNING RUNNING}, {@link JobStatus#COMPLETED COMPLETED} or * {@link JobStatus#FAILED FAILED}. */ @Nonnull JobStatus getStatus(); /** * Returns true, if the job is user-cancelled. Returns false, if it * completed normally or failed due to another error. Jobs running in * clusters before version 5.3 lack this information and will always return * false. * * @throws IllegalStateException if the job is not done. * @since 5.3 */ boolean isUserCancelled(); /** * Associates the given listener to this job. The listener is automatically * removed after a {@linkplain JobStatus#isTerminal terminal event}. * * @return The registration id * @throws UnsupportedOperationException if the cluster version is less than 5.3 * @throws IllegalStateException if the job is completed or failed * @since 5.3 */ UUID addStatusListener(@Nonnull JobStatusListener listener); /** * Stops delivering all events to the listener with the given registration id. * * @return Whether the specified registration was removed * @throws UnsupportedOperationException if the cluster version is less than 5.3 * @since 5.3 */ boolean removeStatusListener(@Nonnull UUID id); // ### Methods below apply only to normal (non-light) jobs. /** * Returns the configuration this job was submitted with. Changes made to * the returned config object will not have any effect. */ @Nonnull JobConfig getConfig(); /** * Applies the specified delta configuration to a {@link #suspend() * suspended} job and returns the updated configuration. * * @throws IllegalStateException if this job is not suspended * @throws UnsupportedOperationException if called for a light job * @since 5.3 */ JobConfig updateConfig(@Nonnull DeltaJobConfig deltaConfig); /** * Return a {@link JobSuspensionCause description of the cause} that has * led to the suspension of the job. Throws an {@code IllegalStateException} * if the job is not currently suspended. Not supported for light jobs. * * @since Jet 4.3 * * @throws UnsupportedOperationException if called for a light job */ @Nonnull JobSuspensionCause getSuspensionCause(); /** * Returns a snapshot of the current values of all job-specific metrics. * Not supported for light jobs. *

* While the job is running the metric values are updated periodically * (see {@linkplain MetricsConfig#setCollectionFrequencySeconds metrics * collection frequency}), assuming that both {@linkplain * MetricsConfig#setEnabled global metrics collection} and {@linkplain * JobConfig#setMetricsEnabled per-job metrics collection} are enabled. * Otherwise empty metrics will be returned. *

* Keep in mind that the collections may occur at different times on * each member, metrics from various members aren't from the same instant. *

* When a job is restarted (or resumed after being previously suspended) * the metrics are reset too, their values will reflect only updates * from the latest execution of the job. *

* Once a job completes successfully, the metrics will have their most * recent values (i.e. the last metric values from the moment before the * job completed), assuming that {@link * JobConfig#setStoreMetricsAfterJobCompletion(boolean) metrics storage} * was enabled. If a job fails, is cancelled or suspended, empty metrics * will be returned. * * @since Jet 3.2 * * @throws UnsupportedOperationException if called for a light job */ @Nonnull JobMetrics getMetrics(); /** * Gracefully stops the current execution and schedules a new execution * with the current member list of the Jet cluster. Can be called to * manually make use of added members, if {@linkplain * JobConfig#setAutoScaling auto scaling} is disabled. Only a running job * can be restarted; a suspended job must be {@linkplain #resume() resumed}. *

* Conceptually this call is equivalent to {@link #suspend()} & {@link * #resume()}. Not supported for light jobs. * * @throws IllegalStateException if the job is not running, for example it * has already completed, is not yet running, is already restarting, * suspended etc. * @throws UnsupportedOperationException if called for a light job */ void restart(); /** * Gracefully suspends the current execution of the job. The job's status * will become {@link JobStatus#SUSPENDED}. To resume the job, call {@link * #resume()}. Not supported for light jobs. *

* You can suspend a job even if it's not configured for {@linkplain * JobConfig#setProcessingGuarantee snapshotting}. Such a job will resume * with empty state, as if it has just been started. *

* This call just initiates the suspension process and doesn't wait for it * to complete. Suspension starts with creating a terminal state snapshot. * Should the terminal snapshot fail, the job will suspend anyway, but the * previous snapshot (if there was one) won't be deleted. When the job * resumes, its processing starts from the point of the last snapshot. *

* NOTE: if the cluster becomes unstable (a member leaves or * similar) while the job is in the process of being suspended, it may end up * getting immediately restarted. Call {@link #getStatus()} to find out and * possibly try to suspend again. * * @throws IllegalStateException if the job is not running * @throws UnsupportedOperationException if called for a light job */ void suspend(); /** * Resumes a {@linkplain #suspend suspended} job. The job will resume from * the last known successful snapshot, if there is one. Not supported for light jobs *

* If the job is not suspended, it does nothing. * * @throws UnsupportedOperationException if called for a light job */ void resume(); /** * Exports and saves a state snapshot with the given name, and then cancels * the job without processing any more data after the barrier (graceful * cancellation). It's similar to {@link #suspend()} followed by a {@link * #cancel()}, except that it won't process any more data after the * snapshot. Not supported for light jobs. *

* You can use the exported snapshot as a starting point for a new job. The * job doesn't need to execute the same Pipeline as the job that created it, * it must just be compatible with its state data. To achieve this, use * {@link JobConfig#setInitialSnapshotName(String)}. *

* Unlike {@link #exportSnapshot} method, when a snapshot is created using * this method Jet will commit the external transactions because this * snapshot is the last one created for the job and it's safe to use it to * continue the processing. *

* If the terminal snapshot fails, Jet will suspend this job instead of * cancelling it. *

* NOTE: if the cluster becomes unstable (a member leaves * or similar) while the job is in the process of being cancelled, it may * end up getting immediately restarted. Call {@link #getStatus()} to find * out and possibly try to cancel again. Should this restart happen, * created snapshot cannot be regarded as exported terminal snapshot. It * shall be neither overwritten nor deleted until there is a new snapshot * created, either automatic or via cancelAndExportSnapshot method, * otherwise data can be lost. If cancelAndExportSnapshot is used again, * ensure that there was a regular snapshot made or use different snapshot * name. *

* You can call this method for a suspended job, too: in that case it will * export the last successful snapshot and cancel the job. *

* The method call will block until it has fully exported the snapshot, but * may return before the job has stopped executing. *

* For more information about "exported state" see {@link * #exportSnapshot(String)}. *

* The job status will be {@link JobStatus#FAILED} after cancellation, * {@link Job#join()} will throw a {@link CancellationException}. * * @param name name of the snapshot. If name is already used, it will be * overwritten * * @throws JetException if the job is in an incorrect state: completed, * cancelled or is in the process of restarting or suspending. * @throws UnsupportedOperationException if called for a light job */ JobStateSnapshot cancelAndExportSnapshot(String name); /** * Exports a state snapshot and saves it under the given name. You can * start a new job using the exported state using {@link * JobConfig#setInitialSnapshotName(String)}. Not supported for light jobs. *

* The snapshot will be independent of the job that created it. Jet won't * automatically delete the IMap it is exported into. You must manually * call {@linkplain JobStateSnapshot#destroy() snapshot.destroy()} to * delete it. If your state is large, make sure you have enough memory to * store it. The snapshot created using this method will also not be used * for automatic restart - should the job fail, the previous automatically * saved snapshot will be used. *

* For transactional sources or sinks (that is those which use transactions * to confirm reads or to commit writes), Jet will not commit the * transactions when creating a snapshot using this method. The reason for * this is that such connectors only achieve exactly-once guarantee if the * job restarts from the latest snapshot. But, for example, if the job * fails after exporting a snapshot but before it creates a new automatic * one, the job would restart from the previous automatic snapshot and the * stored internal and committed external state will be from a different * point in time and a data loss will occur. *

* If a snapshot with the same name already exists, it will be * overwritten. If a snapshot is already in progress for this job (either * automatic or user-requested), the requested one will wait and start * immediately after the previous one completes. If a snapshot with the * same name is requested for two jobs at the same time, their data will * likely be damaged (similar to two processes writing to the same file). *

* You can call this method on a suspended job: in that case it will export * the last successful snapshot. You can also export the state of * non-snapshotted jobs (those with {@link ProcessingGuarantee#NONE}). *

* If you issue any graceful job-control actions such as a graceful member * shutdown or suspending a snapshotted job while Jet is exporting a * snapshot, they will wait in a queue for this snapshot to complete. * Forceful job-control actions will interrupt the export procedure. *

* You can access the exported state using {@link * JetService#getJobStateSnapshot(String)}. *

* The method call will block until it has fully exported the snapshot. * * @param name name of the snapshot. If name is already used, it will be * overwritten * * @throws JetException if the job is in an incorrect state: completed, * cancelled or is in the process of restarting or suspending. * @throws UnsupportedOperationException if called for a light job */ JobStateSnapshot exportSnapshot(String name); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy