Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.io.network.partition.external.writer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.runtime.io.disk.iomanager.FileIOChannel;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.partition.external.ExternalBlockShuffleUtils;
import org.apache.flink.runtime.io.network.partition.external.PartitionIndex;
import org.apache.flink.runtime.operators.sort.ChannelDeleteRegistry;
import org.apache.flink.runtime.operators.sort.DefaultFileMergePolicy;
import org.apache.flink.runtime.operators.sort.PartialOrderPriorityQueue;
import org.apache.flink.runtime.operators.sort.SortedDataFile;
import org.apache.flink.runtime.operators.sort.DataFileInfo;
import org.apache.flink.runtime.operators.sort.SortedDataFileMerger;
import org.apache.flink.runtime.operators.sort.MergePolicy;
import org.apache.flink.util.MutableObjectIterator;
import org.apache.flink.util.Preconditions;
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
/**
* A merger who simply concats the part of the same partition together.
*/
public class ConcatPartitionedFileMerger implements SortedDataFileMerger> {
private static final Logger LOG = LoggerFactory.getLogger(ConcatPartitionedFileMerger.class);
private final int numberOfSubpartitions;
private final String partitionDataRootPath;
private final IOManager ioManager;
private final MergePolicy>> mergePolicy;
/** The merge file index starts from the max value to avoid using the same file id with spilled ones. */
private int mergeFileIndex = Integer.MAX_VALUE;
ConcatPartitionedFileMerger(int numberOfSubpartitions,
String partitionDataRootPath,
int mergeFactor,
boolean enableAsyncMerging,
boolean mergeToOneFile,
IOManager ioManager) {
Preconditions.checkArgument(numberOfSubpartitions > 0, "Illegal subpartition number: " + numberOfSubpartitions);
Preconditions.checkArgument(mergeFactor > 0, "Illegal merge factor: " + mergeFactor);
this.numberOfSubpartitions = numberOfSubpartitions;
this.partitionDataRootPath = Preconditions.checkNotNull(partitionDataRootPath);
this.ioManager = Preconditions.checkNotNull(ioManager);
this.mergePolicy = new DefaultFileMergePolicy<>(mergeFactor, enableAsyncMerging, mergeToOneFile);
}
@Override
public MutableObjectIterator> getMergingIterator(List>> sortedDataFiles,
List mergeReadMemory,
MutableObjectIterator> largeRecords,
ChannelDeleteRegistry> channelDeleteRegistry) throws IOException {
//TODO: will be implemented later.
return new MutableObjectIterator>() {
@Override
public Tuple2 next(Tuple2 reuse) throws IOException {
return null;
}
@Override
public Tuple2 next() throws IOException {
return null;
}
};
}
@Override
public void notifyNewSortedDataFile(SortedDataFile> sortedDataFile,
List writeMemory,
List mergeReadMemory,
ChannelDeleteRegistry> channelDeleteRegistry,
AtomicBoolean aliveFlag) throws IOException {
if (!(sortedDataFile instanceof PartitionedBufferSortedDataFile)) {
throw new IllegalArgumentException("Only PartitionedBufferSortedDataFile is supported: " + sortedDataFile.getClass().getName());
}
DataFileInfo>> dataFileInfo = new DataFileInfo<>(
sortedDataFile.getBytesWritten(), 0, numberOfSubpartitions, sortedDataFile);
mergePolicy.addNewCandidate(dataFileInfo);
mergeIfPossible(mergeReadMemory, channelDeleteRegistry, aliveFlag);
}
@Override
public List>> finishMerging(List writeMemory,
List mergeReadMemory,
ChannelDeleteRegistry> channelDeleteRegistry,
AtomicBoolean aliveFlag) throws IOException {
mergePolicy.startFinalMerge();
mergeIfPossible(mergeReadMemory, channelDeleteRegistry, aliveFlag);
return mergePolicy.getFinalMergeResult();
}
private void mergeIfPossible(List mergeReadMemory,
ChannelDeleteRegistry> channelDeleteRegistry,
AtomicBoolean aliveFlag) throws IOException {
// select merge candidates
List>>> mergeCandidates = mergePolicy.selectMergeCandidates(mergeReadMemory.size());
while (mergeCandidates != null && aliveFlag.get()) {
int maxMergeRound = 0;
LinkedList> toBeMerged = new LinkedList<>();
for (DataFileInfo>> mergeCandidate: mergeCandidates) {
maxMergeRound = Math.max(maxMergeRound, mergeCandidate.getMergeRound());
PartitionedSortedDataFile partitionedSortedDataFile = (PartitionedSortedDataFile) mergeCandidate.getDataFile();
toBeMerged.add(partitionedSortedDataFile);
}
LOG.info("Start merging {} files to one file.", toBeMerged.size());
try {
// merge the candidates to one file
SortedDataFile> mergedFile = mergeToOutput(
toBeMerged, mergeReadMemory, channelDeleteRegistry, mergeFileIndex--);
DataFileInfo>> mergedFileInfo = new DataFileInfo<>(
mergedFile.getBytesWritten(), maxMergeRound + 1, numberOfSubpartitions, mergedFile);
// notify new file
mergePolicy.addNewCandidate(mergedFileInfo);
} catch (InterruptedException e) {
throw new RuntimeException("Merge was interrupted.", e);
}
// select new candidates
mergeCandidates = mergePolicy.selectMergeCandidates(mergeReadMemory.size());
}
}
private ConcatPartitionedBufferSortedDataFile mergeToOutput(List> toBeMerged,
List mergeReadMemory,
ChannelDeleteRegistry> channelDeleteRegistry,
int fileId) throws IOException, InterruptedException {
// Create merged file writer.
final FileIOChannel.ID channel = ioManager.createChannel(new File(ExternalBlockShuffleUtils.generateMergePath(partitionDataRootPath, fileId)));
ConcatPartitionedBufferSortedDataFile writer = new ConcatPartitionedBufferSortedDataFile(
numberOfSubpartitions, channel, fileId, ioManager);
channelDeleteRegistry.registerChannelToBeDelete(channel);
channelDeleteRegistry.registerOpenChannel(writer.getWriteChannel());
// Create file readers.
final List> segments = Lists.partition(mergeReadMemory, mergeReadMemory.size() / toBeMerged.size());
final PartialOrderPriorityQueue heap = new PartialOrderPriorityQueue<>(
new PartitionIndexStreamComparator(), toBeMerged.size());
Set allReaders = new HashSet<>();
for (int i = 0; i < toBeMerged.size(); ++i) {
AsynchronousPartitionedStreamFileReaderDelegate readerDelegate =
new AsynchronousPartitionedStreamFileReaderDelegate(
ioManager, toBeMerged.get(i).getChannelID(), segments.get(i),
toBeMerged.get(i).getPartitionIndexList());
heap.add(new PartitionIndexStream(readerDelegate, toBeMerged.get(i).getPartitionIndexList()));
// will be used when closing read files
allReaders.add(readerDelegate);
// register the opened channel to be closed when error happens
channelDeleteRegistry.registerOpenChannel(readerDelegate.getReader());
}
while (heap.size() > 0) {
final PartitionIndexStream headStream = heap.peek();
final PartitionIndex partitionIndex = headStream.getCurrentPartitionIndex();
if (!headStream.advance()) {
heap.poll();
} else {
heap.adjustTop();
}
// now read the specific length of data
long readLength = 0;
while (readLength < partitionIndex.getLength()) {
Buffer buffer = headStream.getReader().getNextBufferBlocking();
readLength += buffer.getSize();
writer.writeBuffer(partitionIndex.getSubpartition(), buffer);
}
assert readLength == partitionIndex.getLength();
}
writer.finishWriting();
channelDeleteRegistry.unregisterOpenChannel(writer.getWriteChannel());
clearMerged(channelDeleteRegistry, allReaders);
return writer;
}
private void clearMerged(ChannelDeleteRegistry> channelDeleteRegistry,
Set allReaders) throws IOException {
// close the reader and delete the underlying file for already merged channels
for (AsynchronousPartitionedStreamFileReaderDelegate reader : allReaders) {
// close the file reader
reader.close();
channelDeleteRegistry.unregisterOpenChannel(reader.getReader());
// delete the file
reader.getReader().deleteChannel();
channelDeleteRegistry.unregisterChannelToBeDelete(reader.getReader().getChannelID());
}
allReaders.clear();
}
private static final class PartitionIndexStream {
private final AsynchronousPartitionedStreamFileReaderDelegate reader;
private final List partitionIndices;
private int offset;
public PartitionIndexStream(AsynchronousPartitionedStreamFileReaderDelegate reader,
List partitionIndices) {
this.reader = reader;
this.partitionIndices = partitionIndices;
this.offset = 0;
}
public PartitionIndex getCurrentPartitionIndex() {
return partitionIndices.get(offset);
}
public AsynchronousPartitionedStreamFileReaderDelegate getReader() {
return reader;
}
public boolean advance() {
if (offset < partitionIndices.size() - 1) {
offset++;
return true;
}
return false;
}
@Override
public String toString() {
return "PartitionIndexStream{" +
"partitionIndices=" + partitionIndices.size() +
", offset=" + offset +
'}';
}
}
private static final class PartitionIndexStreamComparator implements Comparator {
@Override
public int compare(PartitionIndexStream first, PartitionIndexStream second) {
int firstPartition = first.getCurrentPartitionIndex().getSubpartition();
int secondPartition = second.getCurrentPartitionIndex().getSubpartition();
if (firstPartition != secondPartition) {
return firstPartition < secondPartition ? -1 : 1;
}
long firstStart = first.getCurrentPartitionIndex().getStartOffset();
long secondStart = second.getCurrentPartitionIndex().getStartOffset();
return Long.compare(firstStart, secondStart);
}
}
}