All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.checkpoint.channel.ChannelStateWriterImpl Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.checkpoint.channel;

import org.apache.flink.annotation.Internal;
import org.apache.flink.runtime.checkpoint.CheckpointOptions;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.state.CheckpointStateOutputStream;
import org.apache.flink.runtime.state.CheckpointStorageWorkerView;
import org.apache.flink.util.CloseableIterator;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.concurrent.ThreadSafe;

import java.io.IOException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import static org.apache.flink.runtime.checkpoint.channel.ChannelStateWriteRequest.completeInput;
import static org.apache.flink.runtime.checkpoint.channel.ChannelStateWriteRequest.completeOutput;
import static org.apache.flink.runtime.checkpoint.channel.ChannelStateWriteRequest.write;

/**
 * {@link ChannelStateWriter} implemented using {@link CheckpointStateOutputStream
 * CheckpointStateOutputStreams}. Internally, it has by default
 *
 * 
    *
  • one stream per checkpoint; having multiple streams would mean more files written and more * connections opened (and more latency on restore) *
  • one thread; having multiple threads means more connections, couples with the implementation * and increases complexity *
* *

Thread-safety: this class is thread-safe when used with a thread-safe {@link * ChannelStateWriteRequestExecutor executor} (e.g. default {@link * ChannelStateWriteRequestExecutorImpl}. */ @Internal @ThreadSafe public class ChannelStateWriterImpl implements ChannelStateWriter { private static final Logger LOG = LoggerFactory.getLogger(ChannelStateWriterImpl.class); private static final int DEFAULT_MAX_CHECKPOINTS = 1000; // includes max-concurrent-checkpoints + checkpoints to be aborted (scheduled via // mailbox) private final String taskName; private final ChannelStateWriteRequestExecutor executor; private final ConcurrentMap results; private final int maxCheckpoints; /** * Creates a {@link ChannelStateWriterImpl} with {@link #DEFAULT_MAX_CHECKPOINTS} as {@link * #maxCheckpoints}. */ public ChannelStateWriterImpl( String taskName, int subtaskIndex, CheckpointStorageWorkerView streamFactoryResolver) { this(taskName, subtaskIndex, streamFactoryResolver, DEFAULT_MAX_CHECKPOINTS); } /** * Creates a {@link ChannelStateWriterImpl} with {@link ChannelStateSerializerImpl default} * {@link ChannelStateSerializer}, and a {@link ChannelStateWriteRequestExecutorImpl}. * * @param taskName * @param streamFactoryResolver a factory to obtain output stream factory for a given checkpoint * @param maxCheckpoints maximum number of checkpoints to be written currently or finished but * not taken yet. */ ChannelStateWriterImpl( String taskName, int subtaskIndex, CheckpointStorageWorkerView streamFactoryResolver, int maxCheckpoints) { this( taskName, new ConcurrentHashMap<>(maxCheckpoints), new ChannelStateWriteRequestExecutorImpl( taskName, new ChannelStateWriteRequestDispatcherImpl( taskName, subtaskIndex, streamFactoryResolver, new ChannelStateSerializerImpl())), maxCheckpoints); } ChannelStateWriterImpl( String taskName, ConcurrentMap results, ChannelStateWriteRequestExecutor executor, int maxCheckpoints) { this.taskName = taskName; this.results = results; this.maxCheckpoints = maxCheckpoints; this.executor = executor; } @Override public void start(long checkpointId, CheckpointOptions checkpointOptions) { LOG.debug("{} starting checkpoint {} ({})", taskName, checkpointId, checkpointOptions); ChannelStateWriteResult result = new ChannelStateWriteResult(); ChannelStateWriteResult put = results.computeIfAbsent( checkpointId, id -> { Preconditions.checkState( results.size() < maxCheckpoints, String.format( "%s can't start %d, results.size() > maxCheckpoints: %d > %d", taskName, checkpointId, results.size(), maxCheckpoints)); enqueue( new CheckpointStartRequest( checkpointId, result, checkpointOptions.getTargetLocation()), false); return result; }); Preconditions.checkArgument( put == result, taskName + " result future already present for checkpoint " + checkpointId); } @Override public void addInputData( long checkpointId, InputChannelInfo info, int startSeqNum, CloseableIterator iterator) { LOG.trace( "{} adding input data, checkpoint {}, channel: {}, startSeqNum: {}", taskName, checkpointId, info, startSeqNum); enqueue(write(checkpointId, info, iterator), false); } @Override public void addOutputData( long checkpointId, ResultSubpartitionInfo info, int startSeqNum, Buffer... data) { LOG.trace( "{} adding output data, checkpoint {}, channel: {}, startSeqNum: {}, num buffers: {}", taskName, checkpointId, info, startSeqNum, data == null ? 0 : data.length); enqueue(write(checkpointId, info, data), false); } @Override public void finishInput(long checkpointId) { LOG.debug("{} finishing input data, checkpoint {}", taskName, checkpointId); enqueue(completeInput(checkpointId), false); } @Override public void finishOutput(long checkpointId) { LOG.debug("{} finishing output data, checkpoint {}", taskName, checkpointId); enqueue(completeOutput(checkpointId), false); } @Override public void abort(long checkpointId, Throwable cause, boolean cleanup) { LOG.debug("{} aborting, checkpoint {}", taskName, checkpointId); enqueue(ChannelStateWriteRequest.abort(checkpointId, cause), true); // abort already started enqueue( ChannelStateWriteRequest.abort(checkpointId, cause), false); // abort enqueued but not started if (cleanup) { results.remove(checkpointId); } } @Override public ChannelStateWriteResult getAndRemoveWriteResult(long checkpointId) { LOG.debug("{} requested write result, checkpoint {}", taskName, checkpointId); ChannelStateWriteResult result = results.remove(checkpointId); Preconditions.checkArgument( result != null, taskName + " channel state write result not found for checkpoint " + checkpointId); return result; } public void open() { executor.start(); } @Override public void close() throws IOException { LOG.debug("close, dropping checkpoints {}", results.keySet()); results.clear(); executor.close(); } private void enqueue(ChannelStateWriteRequest request, boolean atTheFront) { // state check and previous errors check are performed inside the worker try { if (atTheFront) { executor.submitPriority(request); } else { executor.submit(request); } } catch (Exception e) { RuntimeException wrapped = new RuntimeException("unable to send request to worker", e); try { request.cancel(e); } catch (Exception cancelException) { wrapped.addSuppressed(cancelException); } throw wrapped; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy