All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.io.network.api.writer.RecordWriter Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.io.network.api.writer;

import org.apache.flink.core.io.IOReadableWritable;
import org.apache.flink.metrics.Counter;
import org.apache.flink.metrics.SimpleCounter;
import org.apache.flink.runtime.event.AbstractEvent;
import org.apache.flink.runtime.io.network.api.serialization.EventSerializer;
import org.apache.flink.runtime.io.network.api.serialization.RecordSerializer;
import org.apache.flink.runtime.io.network.api.serialization.SpanningRecordSerializer;
import org.apache.flink.runtime.io.network.buffer.BufferBuilder;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
import org.apache.flink.runtime.metrics.groups.TaskIOMetricGroup;
import org.apache.flink.util.XORShiftRandom;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Optional;
import java.util.Random;

import static org.apache.flink.runtime.io.network.api.serialization.RecordSerializer.SerializationResult;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * A record-oriented runtime result writer.
 *
 * 

The RecordWriter wraps the runtime's {@link ResultPartitionWriter} and takes care of * serializing records into buffers. * *

Important: it is necessary to call {@link #flushAll()} after * all records have been written with {@link #emit(IOReadableWritable)}. This * ensures that all produced records are written to the output stream (incl. * partially filled ones). * * @param the type of the record that can be emitted with this record writer */ public class RecordWriter { private static final Logger LOG = LoggerFactory.getLogger(RecordWriter.class); private final ResultPartitionWriter targetPartition; private final ChannelSelector channelSelector; private final int numberOfChannels; private final int[] broadcastChannels; private final RecordSerializer serializer; private final Optional[] bufferBuilders; private final Random rng = new XORShiftRandom(); private Counter numBytesOut = new SimpleCounter(); private Counter numBuffersOut = new SimpleCounter(); private final boolean flushAlways; /** Default name for teh output flush thread, if no name with a task reference is given. */ private static final String DEFAULT_OUTPUT_FLUSH_THREAD_NAME = "OutputFlusher"; /** The thread that periodically flushes the output, to give an upper latency bound. */ private final Optional outputFlusher; /** To avoid synchronization overhead on the critical path, best-effort error tracking is enough here.*/ private Throwable flusherException; RecordWriter(ResultPartitionWriter writer, ChannelSelector channelSelector, long timeout, String taskName) { this.targetPartition = writer; this.channelSelector = channelSelector; this.numberOfChannels = writer.getNumberOfSubpartitions(); this.channelSelector.setup(numberOfChannels); this.serializer = new SpanningRecordSerializer(); this.bufferBuilders = new Optional[numberOfChannels]; this.broadcastChannels = new int[numberOfChannels]; for (int i = 0; i < numberOfChannels; i++) { broadcastChannels[i] = i; bufferBuilders[i] = Optional.empty(); } checkArgument(timeout >= -1); this.flushAlways = (timeout == 0); if (timeout == -1 || timeout == 0) { outputFlusher = Optional.empty(); } else { String threadName = taskName == null ? DEFAULT_OUTPUT_FLUSH_THREAD_NAME : DEFAULT_OUTPUT_FLUSH_THREAD_NAME + " for " + taskName; outputFlusher = Optional.of(new OutputFlusher(threadName, timeout)); outputFlusher.get().start(); } } public void emit(T record) throws IOException, InterruptedException { checkErroneous(); emit(record, channelSelector.selectChannel(record)); } /** * This is used to broadcast Streaming Watermarks in-band with records. This ignores * the {@link ChannelSelector}. */ public void broadcastEmit(T record) throws IOException, InterruptedException { checkErroneous(); serializer.serializeRecord(record); boolean pruneAfterCopying = false; for (int channel : broadcastChannels) { if (copyFromSerializerToTargetChannel(channel)) { pruneAfterCopying = true; } } // Make sure we don't hold onto the large intermediate serialization buffer for too long if (pruneAfterCopying) { serializer.prune(); } } /** * This is used to send LatencyMarks to a random target channel. */ public void randomEmit(T record) throws IOException, InterruptedException { emit(record, rng.nextInt(numberOfChannels)); } private void emit(T record, int targetChannel) throws IOException, InterruptedException { serializer.serializeRecord(record); if (copyFromSerializerToTargetChannel(targetChannel)) { serializer.prune(); } } /** * @param targetChannel * @return true if the intermediate serialization buffer should be pruned */ private boolean copyFromSerializerToTargetChannel(int targetChannel) throws IOException, InterruptedException { // We should reset the initial position of the intermediate serialization buffer before // copying, so the serialization results can be copied to multiple target buffers. serializer.reset(); boolean pruneTriggered = false; BufferBuilder bufferBuilder = getBufferBuilder(targetChannel); SerializationResult result = serializer.copyToBufferBuilder(bufferBuilder); while (result.isFullBuffer()) { numBytesOut.inc(bufferBuilder.finish()); numBuffersOut.inc(); // If this was a full record, we are done. Not breaking out of the loop at this point // will lead to another buffer request before breaking out (that would not be a // problem per se, but it can lead to stalls in the pipeline). if (result.isFullRecord()) { pruneTriggered = true; bufferBuilders[targetChannel] = Optional.empty(); break; } bufferBuilder = requestNewBufferBuilder(targetChannel); result = serializer.copyToBufferBuilder(bufferBuilder); } checkState(!serializer.hasSerializedData(), "All data should be written at once"); if (flushAlways) { targetPartition.flush(targetChannel); } return pruneTriggered; } public void broadcastEvent(AbstractEvent event) throws IOException { try (BufferConsumer eventBufferConsumer = EventSerializer.toBufferConsumer(event)) { for (int targetChannel = 0; targetChannel < numberOfChannels; targetChannel++) { tryFinishCurrentBufferBuilder(targetChannel); // Retain the buffer so that it can be recycled by each channel of targetPartition targetPartition.addBufferConsumer(eventBufferConsumer.copy(), targetChannel); } if (flushAlways) { flushAll(); } } } public void flushAll() { targetPartition.flushAll(); } public void clearBuffers() { for (int targetChannel = 0; targetChannel < numberOfChannels; targetChannel++) { closeBufferBuilder(targetChannel); } } /** * Sets the metric group for this RecordWriter. */ public void setMetricGroup(TaskIOMetricGroup metrics) { numBytesOut = metrics.getNumBytesOutCounter(); numBuffersOut = metrics.getNumBuffersOutCounter(); } /** * Marks the current {@link BufferBuilder} as finished and clears the state for next one. */ private void tryFinishCurrentBufferBuilder(int targetChannel) { if (!bufferBuilders[targetChannel].isPresent()) { return; } BufferBuilder bufferBuilder = bufferBuilders[targetChannel].get(); bufferBuilders[targetChannel] = Optional.empty(); numBytesOut.inc(bufferBuilder.finish()); numBuffersOut.inc(); } /** * The {@link BufferBuilder} may already exist if not filled up last time, otherwise we need * request a new one for this target channel. */ private BufferBuilder getBufferBuilder(int targetChannel) throws IOException, InterruptedException { if (bufferBuilders[targetChannel].isPresent()) { return bufferBuilders[targetChannel].get(); } else { return requestNewBufferBuilder(targetChannel); } } private BufferBuilder requestNewBufferBuilder(int targetChannel) throws IOException, InterruptedException { checkState(!bufferBuilders[targetChannel].isPresent() || bufferBuilders[targetChannel].get().isFinished()); BufferBuilder bufferBuilder = targetPartition.getBufferBuilder(); bufferBuilders[targetChannel] = Optional.of(bufferBuilder); targetPartition.addBufferConsumer(bufferBuilder.createBufferConsumer(), targetChannel); return bufferBuilder; } private void closeBufferBuilder(int targetChannel) { if (bufferBuilders[targetChannel].isPresent()) { bufferBuilders[targetChannel].get().finish(); bufferBuilders[targetChannel] = Optional.empty(); } } /** * Closes the writer. This stops the flushing thread (if there is one). */ public void close() { clearBuffers(); // make sure we terminate the thread in any case if (outputFlusher.isPresent()) { outputFlusher.get().terminate(); try { outputFlusher.get().join(); } catch (InterruptedException e) { // ignore on close // restore interrupt flag to fast exit further blocking calls Thread.currentThread().interrupt(); } } } /** * Notifies the writer that the output flusher thread encountered an exception. * * @param t The exception to report. */ private void notifyFlusherException(Throwable t) { if (flusherException == null) { LOG.error("An exception happened while flushing the outputs", t); flusherException = t; } } private void checkErroneous() throws IOException { if (flusherException != null) { throw new IOException("An exception happened while flushing the outputs", flusherException); } } // ------------------------------------------------------------------------ /** * A dedicated thread that periodically flushes the output buffers, to set upper latency bounds. * *

The thread is daemonic, because it is only a utility thread. */ private class OutputFlusher extends Thread { private final long timeout; private volatile boolean running = true; OutputFlusher(String name, long timeout) { super(name); setDaemon(true); this.timeout = timeout; } public void terminate() { running = false; interrupt(); } @Override public void run() { try { while (running) { try { Thread.sleep(timeout); } catch (InterruptedException e) { // propagate this if we are still running, because it should not happen // in that case if (running) { throw new Exception(e); } } // any errors here should let the thread come to a halt and be // recognized by the writer flushAll(); } } catch (Throwable t) { notifyFlusherException(t); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy