All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.io.network.partition.SpillableSubpartition Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.io.network.partition;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.configuration.TaskManagerOptions;
import org.apache.flink.runtime.io.disk.iomanager.BufferFileWriter;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.network.api.EndOfPartitionEvent;
import org.apache.flink.runtime.io.network.api.serialization.EventSerializer;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.BufferConsumer;
import org.apache.flink.runtime.io.network.buffer.BufferPool;
import org.apache.flink.util.ExceptionUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.concurrent.LinkedBlockingQueue;

import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * A spillable sub partition starts out in-memory and spills to disk if asked
 * to do so.
 *
 * 

Buffers for the partition come from a {@link BufferPool}. The buffer pool * is also responsible to trigger the release of the buffers if it needs them * back. At this point, the spillable sub partition will write all in-memory * buffers to disk. All added buffers after that point directly go to disk. * *

This partition type is used for {@link ResultPartitionType#BLOCKING} * results that are fully produced before they can be consumed. At the point * when they are consumed, the buffers are (i) all in-memory, (ii) currently * being spilled to disk, or (iii) completely spilled to disk. Depending on * this state, different reader variants are returned (see * {@link SpillableSubpartitionView} and {@link SpilledSubpartitionView}). * *

Since the network buffer pool size for outgoing partitions is usually * quite small, e.g. via the {@link TaskManagerOptions#NETWORK_BUFFERS_PER_CHANNEL} * and {@link TaskManagerOptions#NETWORK_EXTRA_BUFFERS_PER_GATE} parameters * for bounded channels or from the default values of * {@link TaskManagerOptions#NETWORK_BUFFERS_MEMORY_FRACTION}, * {@link TaskManagerOptions#NETWORK_BUFFERS_MEMORY_MIN}, and * {@link TaskManagerOptions#NETWORK_BUFFERS_MEMORY_MAX}, most spillable partitions * will be spilled for real-world data sets. * *

Note on thread safety. Synchronizing on {@code buffers} is used to synchronize * writes and reads. Synchronizing on {@code this} is used against concurrent * {@link #add(BufferConsumer)} and clean ups {@link #release()} / {@link #finish()} which * also are touching {@code spillWriter}. Since we do not want to block reads during * spilling, we need those two synchronization. Probably this model could be simplified. */ class SpillableSubpartition extends ResultSubpartition { private static final Logger LOG = LoggerFactory.getLogger(SpillableSubpartition.class); /** The I/O manager used for spilling buffers to disk. */ private final IOManager ioManager; /** The writer used for spilling. As long as this is null, we are in-memory. */ private BufferFileWriter spillWriter; /** Flag indicating whether the subpartition has been finished. */ private boolean isFinished; /** Flag indicating whether the subpartition has been released. */ private volatile boolean isReleased; /** The read view to consume this subpartition. */ private ResultSubpartitionView readView; private LinkedBlockingQueue unreleasedReadViews; SpillableSubpartition(int index, InternalResultPartition parent, IOManager ioManager) { this(index, parent, ioManager, false); } SpillableSubpartition( int index, InternalResultPartition parent, IOManager ioManager, boolean spillDirectly) { super(index, parent); this.ioManager = checkNotNull(ioManager); this.unreleasedReadViews = new LinkedBlockingQueue<>(); if (spillDirectly) { try { spillWriter = ioManager.createBufferFileWriter(ioManager.createChannel()); } catch (IOException e) { ExceptionUtils.rethrow(e); } } } @Override public synchronized boolean add(BufferConsumer bufferConsumer) throws IOException { return add(bufferConsumer, false); } private boolean add(BufferConsumer bufferConsumer, boolean forceFinishRemainingBuffers) throws IOException { checkNotNull(bufferConsumer); synchronized (buffers) { if (isFinished || isReleased) { bufferConsumer.close(); return false; } buffers.add(bufferConsumer); // The number of buffers are needed later when creating // the read views. If you ever remove this line here, // make sure to still count the number of buffers. updateStatistics(bufferConsumer); increaseBuffersInBacklog(bufferConsumer); if (spillWriter != null) { spillFinishedBufferConsumers(forceFinishRemainingBuffers); } } return true; } @Override public void flush() { synchronized (buffers) { if (readView != null) { readView.notifyDataAvailable(); } } } @Override public synchronized void finish() throws IOException { synchronized (buffers) { if (add(EventSerializer.toBufferConsumer(EndOfPartitionEvent.INSTANCE), true)) { isFinished = true; } flush(); } // If we are spilling/have spilled, wait for the writer to finish if (spillWriter != null) { spillWriter.close(); } } @Override public synchronized void release() throws IOException { // view reference accessible outside the lock, but assigned inside the locked scope final ResultSubpartitionView view; synchronized (buffers) { if (isReleased) { return; } // Release all available buffers for (BufferConsumer buffer : buffers) { buffer.close(); } buffers.clear(); // TODO This can block until all buffers are written out to // disk if a spill is in-progress before deleting the file. // It is possibly called from the Netty event loop threads, // which can bring down the network. if (spillWriter != null) { spillWriter.closeAndDelete(); } isReleased = true; } while (!unreleasedReadViews.isEmpty()) { unreleasedReadViews.poll().releaseAllResources(); } } @Override public ResultSubpartitionView createReadView(BufferAvailabilityListener availabilityListener) throws IOException { synchronized (buffers) { if (!isFinished) { throw new IllegalStateException("Subpartition has not been finished yet, " + "but blocking subpartitions can only be consumed after they have " + "been finished."); } checkState(!isReleased, "Subpartition has been released."); if (spillWriter != null) { readView = new SpilledSubpartitionView( this, parent.getBufferProvider().getMemorySegmentSize(), spillWriter, getTotalNumberOfBuffers(), availabilityListener); } else { if (readView != null) { throw new IllegalStateException("Subpartition is being or already has been " + "consumed, but we currently allow subpartitions to only be consumed once."); } readView = new SpillableSubpartitionView( this, buffers, ioManager, parent.getBufferProvider().getMemorySegmentSize(), availabilityListener); } unreleasedReadViews.add(readView); return readView; } } @Override public int releaseMemory() throws IOException { synchronized (buffers) { ResultSubpartitionView view = readView; if (view != null && view.getClass() == SpillableSubpartitionView.class) { // If there is a spillable view, it's the responsibility of the // view to release memory. SpillableSubpartitionView spillableView = (SpillableSubpartitionView) view; return spillableView.releaseMemory(); } else if (spillWriter == null) { // No view and in-memory => spill to disk spillWriter = ioManager.createBufferFileWriter(ioManager.createChannel()); int numberOfBuffers = buffers.size(); long spilledBytes = spillFinishedBufferConsumers(isFinished); int spilledBuffers = numberOfBuffers - buffers.size(); LOG.debug("Spilling {} bytes ({} buffers} for sub partition {} of {}.", spilledBytes, spilledBuffers, index, parent.getPartitionId()); return spilledBuffers; } } // Else: We have already spilled and don't hold any buffers return 0; } @VisibleForTesting long spillFinishedBufferConsumers(boolean forceFinishRemainingBuffers) throws IOException { long spilledBytes = 0; while (!buffers.isEmpty()) { BufferConsumer bufferConsumer = buffers.getFirst(); Buffer buffer = bufferConsumer.build(); updateStatistics(buffer); int bufferSize = buffer.getSize(); spilledBytes += bufferSize; // NOTE we may be in the process of finishing the subpartition where any buffer should // be treated as if it was finished! if (bufferConsumer.isFinished() || forceFinishRemainingBuffers) { if (bufferSize > 0) { spillWriter.writeBlock(buffer); } else { // If we skip a buffer for the spill writer, we need to adapt the backlog accordingly decreaseBuffersInBacklog(buffer); buffer.recycleBuffer(); } bufferConsumer.close(); buffers.poll(); } else { // If there is already data, we need to spill it anyway, since we do not get this // slice from the buffer consumer again during the next build. // BEWARE: by doing so, we increase the actual number of buffers in the spill writer! if (bufferSize > 0) { spillWriter.writeBlock(buffer); increaseBuffersInBacklog(bufferConsumer); } else { buffer.recycleBuffer(); } return spilledBytes; } } return spilledBytes; } @Override public boolean isReleased() { return isReleased; } @Override public int unsynchronizedGetNumberOfQueuedBuffers() { // since we do not synchronize, the size may actually be lower than 0! return Math.max(buffers.size(), 0); } @Override public String toString() { return String.format("SpillableSubpartition [%d number of buffers (%d bytes)," + "%d number of buffers in backlog, finished? %s, read view? %s, spilled? %s]", getTotalNumberOfBuffers(), getTotalNumberOfBytes(), getBuffersInBacklog(), isFinished, readView != null, spillWriter != null); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy