All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.io.network.partition.external.writer.PartitionMergeFileWriter Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.io.network.partition.external.writer;

import org.apache.flink.api.common.typeutils.TypeComparator;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.IntComparator;
import org.apache.flink.api.common.typeutils.base.IntSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.runtime.DuplicateOnlySerializerFactory;
import org.apache.flink.api.java.typeutils.runtime.TupleComparator;
import org.apache.flink.api.java.typeutils.runtime.TupleSerializer;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.metrics.Counter;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.network.api.serialization.SerializerManager;
import org.apache.flink.runtime.io.network.partition.external.ExternalBlockShuffleUtils;
import org.apache.flink.runtime.io.network.partition.external.PartitionIndex;
import org.apache.flink.runtime.io.network.partition.external.PersistentFileType;
import org.apache.flink.runtime.jobgraph.tasks.AbstractInvokable;
import org.apache.flink.runtime.memory.MemoryAllocationException;
import org.apache.flink.runtime.memory.MemoryManager;
import org.apache.flink.runtime.operators.sort.PushedUnilateralSortMerger;
import org.apache.flink.runtime.operators.sort.SortedDataFile;
import org.apache.flink.runtime.operators.sort.SortedDataFileMerger;
import org.apache.flink.runtime.plugable.SerializationDelegate;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import static org.apache.flink.util.Preconditions.checkArgument;

/**
 * Shuffle writing using the outer sort-merger.
 */
public class PartitionMergeFileWriter implements PersistentFileWriter {
	private static final Logger LOG = LoggerFactory.getLogger(PartitionMergeFileWriter.class);

	private final String partitionDataRootPath;

	private final TypeSerializer typeSerializer;

	private final List allMemory;

	private final Tuple2 reuse = new Tuple2<>();

	private final PushedUnilateralSortMerger> sortMerger;

	public PartitionMergeFileWriter(
		int numPartitions,
		String partitionDataRootPath,
		int mergeFactor,
		boolean enableAsyncMerging,
		boolean mergeToOneFile,
		boolean enableDoubleBuffer,
		MemoryManager memoryManager,
		List memory,
		IOManager ioManager,
		TypeSerializer serializer,
		SerializerManager> serializerManager,
		AbstractInvokable parentTask) throws IOException, MemoryAllocationException {

		this(numPartitions, partitionDataRootPath, mergeFactor, enableAsyncMerging,
			mergeToOneFile, enableDoubleBuffer, memoryManager, memory, ioManager,
			serializer, serializerManager, parentTask, null, null);
	}

	public PartitionMergeFileWriter(
		int numPartitions,
		String partitionDataRootPath,
		int mergeFactor,
		boolean enableAsyncMerging,
		boolean mergeToOneFile,
		boolean enableDoubleBuffer,
		MemoryManager memoryManager,
		List memory,
		IOManager ioManager,
		TypeSerializer serializer,
		SerializerManager> serializerManager,
		AbstractInvokable parentTask,
		Counter numBytesOut,
		Counter numBuffersOut) throws IOException, MemoryAllocationException {
		checkArgument(numPartitions > 0,
			"The number of subpartitions should be larger than 0, but actually is: " + numPartitions);
		checkArgument(mergeFactor >= 2, "Illegal merge factor: " + mergeFactor);

		this.partitionDataRootPath = partitionDataRootPath;

		this.typeSerializer = serializer;

		this.allMemory = memory;

		// Create the sort merger
		Class> typedTuple = (Class>) (Class) Tuple2.class;
		TypeSerializer[] serializers = new TypeSerializer[]{IntSerializer.INSTANCE, serializer.duplicate()};
		TypeSerializer> tuple2Serializer = new TupleSerializer<>(typedTuple, serializers);
		DuplicateOnlySerializerFactory> serializerFactory = new DuplicateOnlySerializerFactory<>(tuple2Serializer);

		int[] keyPositions = new int[]{0};
		TypeComparator[] comparators = new TypeComparator[]{new IntComparator(true)};
		TupleComparator> tuple2Comparator = new TupleComparator<>(
			keyPositions, comparators, serializers);

		BufferSortedDataFileFactory sortedDataFileFactory = new BufferSortedDataFileFactory<>(
			partitionDataRootPath, typeSerializer, ioManager, serializerManager, numBytesOut, numBuffersOut);

		PartitionedBufferSortedDataFileFactory partitionedBufferSortedDataFileFactory =
			new PartitionedBufferSortedDataFileFactory(sortedDataFileFactory, numPartitions);

		SortedDataFileMerger> merger = new ConcatPartitionedFileMerger(
			numPartitions, partitionDataRootPath, mergeFactor, enableAsyncMerging, mergeToOneFile, ioManager);

		sortMerger = new PushedUnilateralSortMerger<>(partitionedBufferSortedDataFileFactory, merger,
			memoryManager, allMemory, ioManager, parentTask, serializerFactory, tuple2Comparator,
			0, mergeFactor, false, 0, false, true, true, enableAsyncMerging, enableDoubleBuffer);

		LOG.info("External result partition writer initialized.");
	}

	@Override
	public void add(T record, int targetPartition) throws IOException {
		reuse.f1 = record;
		reuse.f0 = targetPartition;
		sortMerger.add(reuse);
	}

	@Override
	public void add(T record, int[] targetPartitions) throws IOException {
		reuse.f1 = record;
		for (int partition : targetPartitions) {
			reuse.f0 = partition;
			sortMerger.add(reuse);
		}
	}

	@Override
	public void finish() throws IOException, InterruptedException {
		sortMerger.finishAdding();

		List>> remainFiles = sortMerger.getRemainingSortedDataFiles();

		int nextFileId = 0;
		FileSystem localFileSystem = FileSystem.getLocalFileSystem();
		for (SortedDataFile> file : remainFiles) {
			// rename file
			localFileSystem.rename(
				new Path(file.getChannelID().getPath()),
				new Path(ExternalBlockShuffleUtils.generateDataPath(partitionDataRootPath, nextFileId++)));
		}

		LOG.info("Finish external result partition writing.");
	}

	@Override
	public List> generatePartitionIndices() throws IOException, InterruptedException {
		List> partitionIndices = new ArrayList<>();
		List>> remainFiles = sortMerger.getRemainingSortedDataFiles();

		for (SortedDataFile> file : remainFiles) {
			if (!(file instanceof PartitionedSortedDataFile)) {
				throw new IllegalStateException("Unexpected file type.");
			}

			partitionIndices.add(((PartitionedSortedDataFile) file).getPartitionIndexList());
		}

		return partitionIndices;
	}

	@Override
	public void clear() throws IOException {
		// nothing to do
	}

	@Override
	public PersistentFileType getExternalFileType() {
		return PersistentFileType.MERGED_PARTITION_FILE;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy