org.apache.flink.runtime.io.network.partition.external.MergedPartitionIndices Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.io.network.partition.external;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataInputViewStreamWrapper;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicReference;
import static
org.apache.flink.runtime.io.network.partition.external.ExternalBlockResultPartitionMeta.ExternalSubpartitionMeta;
import static org.apache.flink.util.Preconditions.checkArgument;
/**
* Holds PartitionIndices of one partition file in the format of {@code MERGED_PARTITION_FILE}.
*/
public class MergedPartitionIndices implements PartitionIndices {
/** File system of data and index file for this partition */
private final FileSystem fileSystem;
/** The path of result partition index file. */
private final Path indexFilePath;
/** The path of result partition data file. */
private final Path dataFilePath;
/** The number of subpartitions. */
private final int subpartitionNum;
/** The array of offsets which subpartitions start in the partition file. */
@VisibleForTesting
protected final AtomicReference offsetArrayRef = new AtomicReference<>();
public MergedPartitionIndices(FileSystem fileSystem, String partitionDir, int spillIdx, int subpartitionNum) {
this.fileSystem = fileSystem;
this.indexFilePath = new Path(ExternalBlockShuffleUtils.generateIndexPath(partitionDir, spillIdx));
this.dataFilePath = new Path(ExternalBlockShuffleUtils.generateDataPath(partitionDir, spillIdx));
this.subpartitionNum = subpartitionNum;
}
public void initialize() throws IOException {
this.offsetArrayRef.set(loadPartitionIndices());
}
public ExternalSubpartitionMeta getSubpartitionMeta(int subpartitionIndex) throws IOException {
checkArgument(subpartitionIndex >= 0 && subpartitionIndex < subpartitionNum, "Invalid subpartition index.");
long[] tmpOffsetArray = offsetArrayRef.get();
// offsetArrayRef can be set null to make it GCable.
if (tmpOffsetArray == null) {
tmpOffsetArray = loadPartitionIndices();
offsetArrayRef.set(tmpOffsetArray);
}
long offset = tmpOffsetArray[subpartitionIndex];
return new ExternalSubpartitionMeta(dataFilePath, offset, tmpOffsetArray[subpartitionIndex + 1] - offset);
}
public long shrinkMemoryFootprint() {
long[] tmpOffsetArray = offsetArrayRef.get();
if (tmpOffsetArray != null) {
offsetArrayRef.lazySet(null);
return 8 * (subpartitionNum + 1);
} else {
return 0;
}
}
public long getShrinkableMemoryFootprint() {
long[] tmpOffsetArray = offsetArrayRef.get();
if (tmpOffsetArray != null) {
return 8L * (subpartitionNum + 1);
} else {
return 0;
}
}
private long[] loadPartitionIndices() throws IOException {
// Checks whether index files exist
if (!fileSystem.exists(indexFilePath)) {
throw new IOException("Index file doesn't exist, file path: " + indexFilePath.getPath());
}
// Loads PartitionIndices from index files
try (FSDataInputStream indexIn = fileSystem.open(indexFilePath)) {
DataInputView indexView = new DataInputViewStreamWrapper(indexIn);
// Gets the number of partitions indices in the index file.
final int size = indexView.readInt();
// Stores the overall length in the last element of the array.
long[] tmpOffsetArray = new long[subpartitionNum + 1];
int nextPartitionId = 0;
long nextOffset = 0L;
for (int i = 0; i < size; i++) {
int partitionId = indexView.readInt();
// Fills offsets of non-existent partitions in case of noncontinuous partition IDs.
while (nextPartitionId < partitionId) {
tmpOffsetArray[nextPartitionId++] = nextOffset;
}
if (nextPartitionId != partitionId) {
throw new IOException("Got invalid partition id, expected nextPartitionId: "
+ nextPartitionId + ", real partitionId: " + partitionId);
}
long startOffset = indexView.readLong();
long lengthOfPartition = indexView.readLong();
if (nextOffset != startOffset) {
throw new IOException("Offset is not continuous, partition id: " + partitionId
+ ", expected nextOffset: " + nextOffset + ", real startOffset: " + startOffset);
}
tmpOffsetArray[nextPartitionId++] = startOffset;
nextOffset += lengthOfPartition;
}
while (nextPartitionId < subpartitionNum) {
tmpOffsetArray[nextPartitionId++] = nextOffset;
}
// Stores the overall length in the last element of the array.
checkArgument(nextPartitionId == subpartitionNum);
tmpOffsetArray[subpartitionNum] = nextOffset;
return tmpOffsetArray;
} catch (IOException e) {
throw new IOException("Cannot read index file, file path: " + indexFilePath.getPath(), e);
}
}
}