All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.checkpoint.channel.ChannelStateCheckpointWriter Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.checkpoint.channel;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.checkpoint.channel.ChannelStateWriter.ChannelStateWriteResult;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.logger.NetworkActionsLogger;
import org.apache.flink.runtime.state.AbstractChannelStateHandle;
import org.apache.flink.runtime.state.AbstractChannelStateHandle.StateContentMetaInfo;
import org.apache.flink.runtime.state.CheckpointStateOutputStream;
import org.apache.flink.runtime.state.CheckpointStreamFactory;
import org.apache.flink.runtime.state.InputChannelStateHandle;
import org.apache.flink.runtime.state.ResultSubpartitionStateHandle;
import org.apache.flink.runtime.state.StreamStateHandle;
import org.apache.flink.runtime.state.memory.ByteStreamStateHandle;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.function.RunnableWithException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.concurrent.NotThreadSafe;

import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;

import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static java.util.UUID.randomUUID;
import static org.apache.flink.runtime.state.CheckpointedStateScope.EXCLUSIVE;
import static org.apache.flink.util.ExceptionUtils.findThrowable;
import static org.apache.flink.util.ExceptionUtils.rethrow;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/** Writes channel state for a specific checkpoint-subtask-attempt triple. */
@NotThreadSafe
class ChannelStateCheckpointWriter {
    private static final Logger LOG = LoggerFactory.getLogger(ChannelStateCheckpointWriter.class);

    private final DataOutputStream dataStream;
    private final CheckpointStateOutputStream checkpointStream;
    private final ChannelStateWriteResult result;
    private final Map inputChannelOffsets = new HashMap<>();
    private final Map resultSubpartitionOffsets =
            new HashMap<>();
    private final ChannelStateSerializer serializer;
    private final long checkpointId;
    private boolean allInputsReceived = false;
    private boolean allOutputsReceived = false;
    private final RunnableWithException onComplete;
    private final int subtaskIndex;
    private String taskName;

    ChannelStateCheckpointWriter(
            String taskName,
            int subtaskIndex,
            CheckpointStartRequest startCheckpointItem,
            CheckpointStreamFactory streamFactory,
            ChannelStateSerializer serializer,
            RunnableWithException onComplete)
            throws Exception {
        this(
                taskName,
                subtaskIndex,
                startCheckpointItem.getCheckpointId(),
                startCheckpointItem.getTargetResult(),
                streamFactory.createCheckpointStateOutputStream(EXCLUSIVE),
                serializer,
                onComplete);
    }

    @VisibleForTesting
    ChannelStateCheckpointWriter(
            String taskName,
            int subtaskIndex,
            long checkpointId,
            ChannelStateWriteResult result,
            CheckpointStateOutputStream stream,
            ChannelStateSerializer serializer,
            RunnableWithException onComplete) {
        this(
                taskName,
                subtaskIndex,
                checkpointId,
                result,
                serializer,
                onComplete,
                stream,
                new DataOutputStream(stream));
    }

    @VisibleForTesting
    ChannelStateCheckpointWriter(
            String taskName,
            int subtaskIndex,
            long checkpointId,
            ChannelStateWriteResult result,
            ChannelStateSerializer serializer,
            RunnableWithException onComplete,
            CheckpointStateOutputStream checkpointStateOutputStream,
            DataOutputStream dataStream) {
        this.taskName = taskName;
        this.subtaskIndex = subtaskIndex;
        this.checkpointId = checkpointId;
        this.result = checkNotNull(result);
        this.checkpointStream = checkNotNull(checkpointStateOutputStream);
        this.serializer = checkNotNull(serializer);
        this.dataStream = checkNotNull(dataStream);
        this.onComplete = checkNotNull(onComplete);
        runWithChecks(() -> serializer.writeHeader(dataStream));
    }

    void writeInput(InputChannelInfo info, Buffer buffer) {
        write(
                inputChannelOffsets,
                info,
                buffer,
                !allInputsReceived,
                "ChannelStateCheckpointWriter#writeInput");
    }

    void writeOutput(ResultSubpartitionInfo info, Buffer buffer) {
        write(
                resultSubpartitionOffsets,
                info,
                buffer,
                !allOutputsReceived,
                "ChannelStateCheckpointWriter#writeOutput");
    }

    private  void write(
            Map offsets,
            K key,
            Buffer buffer,
            boolean precondition,
            String action) {
        try {
            if (result.isDone()) {
                return;
            }
            runWithChecks(
                    () -> {
                        checkState(precondition);
                        long offset = checkpointStream.getPos();
                        try (AutoCloseable ignored =
                                NetworkActionsLogger.measureIO(action, buffer)) {
                            serializer.writeData(dataStream, buffer);
                        }
                        long size = checkpointStream.getPos() - offset;
                        offsets.computeIfAbsent(key, unused -> new StateContentMetaInfo())
                                .withDataAdded(offset, size);
                        NetworkActionsLogger.tracePersist(
                                action, buffer, taskName, key, checkpointId);
                    });
        } finally {
            buffer.recycleBuffer();
        }
    }

    void completeInput() throws Exception {
        LOG.debug("complete input, output completed: {}", allOutputsReceived);
        complete(!allInputsReceived, () -> allInputsReceived = true);
    }

    void completeOutput() throws Exception {
        LOG.debug("complete output, input completed: {}", allInputsReceived);
        complete(!allOutputsReceived, () -> allOutputsReceived = true);
    }

    private void complete(boolean precondition, RunnableWithException complete) throws Exception {
        if (result.isDone()) {
            // likely after abort - only need to set the flag run onComplete callback
            doComplete(precondition, complete, onComplete);
        } else {
            runWithChecks(
                    () ->
                            doComplete(
                                    precondition,
                                    complete,
                                    onComplete,
                                    this::finishWriteAndResult));
        }
    }

    private void finishWriteAndResult() throws IOException {
        if (inputChannelOffsets.isEmpty() && resultSubpartitionOffsets.isEmpty()) {
            dataStream.close();
            result.inputChannelStateHandles.complete(emptyList());
            result.resultSubpartitionStateHandles.complete(emptyList());
            return;
        }
        dataStream.flush();
        StreamStateHandle underlying = checkpointStream.closeAndGetHandle();
        complete(
                underlying,
                result.inputChannelStateHandles,
                inputChannelOffsets,
                HandleFactory.INPUT_CHANNEL);
        complete(
                underlying,
                result.resultSubpartitionStateHandles,
                resultSubpartitionOffsets,
                HandleFactory.RESULT_SUBPARTITION);
    }

    private void doComplete(
            boolean precondition,
            RunnableWithException complete,
            RunnableWithException... callbacks)
            throws Exception {
        Preconditions.checkArgument(precondition);
        complete.run();
        if (allInputsReceived && allOutputsReceived) {
            for (RunnableWithException callback : callbacks) {
                callback.run();
            }
        }
    }

    private > void complete(
            StreamStateHandle underlying,
            CompletableFuture> future,
            Map offsets,
            HandleFactory handleFactory)
            throws IOException {
        final Collection handles = new ArrayList<>();
        for (Map.Entry e : offsets.entrySet()) {
            handles.add(createHandle(handleFactory, underlying, e.getKey(), e.getValue()));
        }
        future.complete(handles);
        LOG.debug(
                "channel state write completed, checkpointId: {}, handles: {}",
                checkpointId,
                handles);
    }

    private > H createHandle(
            HandleFactory handleFactory,
            StreamStateHandle underlying,
            I channelInfo,
            StateContentMetaInfo contentMetaInfo)
            throws IOException {
        Optional bytes =
                underlying.asBytesIfInMemory(); // todo: consider restructuring channel state and
        // removing this method:
        // https://issues.apache.org/jira/browse/FLINK-17972
        if (bytes.isPresent()) {
            StreamStateHandle extracted =
                    new ByteStreamStateHandle(
                            randomUUID().toString(),
                            serializer.extractAndMerge(bytes.get(), contentMetaInfo.getOffsets()));
            return handleFactory.create(
                    subtaskIndex,
                    channelInfo,
                    extracted,
                    singletonList(serializer.getHeaderLength()),
                    extracted.getStateSize());
        } else {
            return handleFactory.create(
                    subtaskIndex,
                    channelInfo,
                    underlying,
                    contentMetaInfo.getOffsets(),
                    contentMetaInfo.getSize());
        }
    }

    private void runWithChecks(RunnableWithException r) {
        try {
            checkState(!result.isDone(), "result is already completed", result);
            r.run();
        } catch (Exception e) {
            fail(e);
            if (!findThrowable(e, IOException.class).isPresent()) {
                rethrow(e);
            }
        }
    }

    public void fail(Throwable e) {
        result.fail(e);
        try {
            checkpointStream.close();
        } catch (Exception closeException) {
            String message = "Unable to close checkpointStream after a failure";
            if (findThrowable(closeException, IOException.class).isPresent()) {
                LOG.warn(message, closeException);
            } else {
                throw new RuntimeException(message, closeException);
            }
        }
    }

    private interface HandleFactory> {
        H create(
                int subtaskIndex,
                I info,
                StreamStateHandle underlying,
                List offsets,
                long size);

        HandleFactory INPUT_CHANNEL =
                InputChannelStateHandle::new;

        HandleFactory RESULT_SUBPARTITION =
                ResultSubpartitionStateHandle::new;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy