com.netease.arctic.shade.org.apache.iceberg.BaseFileScanTask Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.netease.arctic.shade.org.apache.iceberg;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import com.netease.arctic.shade.org.apache.iceberg.expressions.Expression;
import com.netease.arctic.shade.org.apache.iceberg.expressions.ResidualEvaluator;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;
class BaseFileScanTask implements FileScanTask {
private final DataFile file;
private final DeleteFile[] deletes;
private final String schemaString;
private final String specString;
private final ResidualEvaluator residuals;
private transient PartitionSpec spec = null;
BaseFileScanTask(DataFile file, DeleteFile[] deletes, String schemaString, String specString,
ResidualEvaluator residuals) {
this.file = file;
this.deletes = deletes != null ? deletes : new DeleteFile[0];
this.schemaString = schemaString;
this.specString = specString;
this.residuals = residuals;
}
@Override
public DataFile file() {
return file;
}
@Override
public List deletes() {
return ImmutableList.copyOf(deletes);
}
@Override
public PartitionSpec spec() {
if (spec == null) {
this.spec = PartitionSpecParser.fromJson(SchemaParser.fromJson(schemaString), specString);
}
return spec;
}
@Override
public long start() {
return 0;
}
@Override
public long length() {
return file.fileSizeInBytes();
}
@Override
public Expression residual() {
return residuals.residualFor(file.partition());
}
@Override
public Iterable split(long targetSplitSize) {
if (file.format().isSplittable()) {
if (file.splitOffsets() != null) {
return () -> new OffsetsAwareTargetSplitSizeScanTaskIterator(file.splitOffsets(), this);
} else {
return () -> new FixedSizeSplitScanTaskIterator(targetSplitSize, this);
}
}
return ImmutableList.of(this);
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("file", file.path())
.add("partition_data", file.partition())
.add("residual", residual())
.toString();
}
/**
* This iterator returns {@link FileScanTask} using guidance provided by split offsets.
*/
@VisibleForTesting
static final class OffsetsAwareTargetSplitSizeScanTaskIterator implements Iterator {
private final List offsets;
private final List splitSizes;
private final FileScanTask parentScanTask;
private int sizeIdx = 0;
OffsetsAwareTargetSplitSizeScanTaskIterator(List offsetList, FileScanTask parentScanTask) {
this.offsets = ImmutableList.copyOf(offsetList);
this.parentScanTask = parentScanTask;
this.splitSizes = Lists.newArrayListWithCapacity(offsets.size());
if (offsets.size() > 0) {
int lastIndex = offsets.size() - 1;
for (int index = 0; index < lastIndex; index++) {
splitSizes.add(offsets.get(index + 1) - offsets.get(index));
}
splitSizes.add(parentScanTask.length() - offsets.get(lastIndex));
}
}
@Override
public boolean hasNext() {
return sizeIdx < splitSizes.size();
}
@Override
public FileScanTask next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
int offsetIdx = sizeIdx;
long currentSize = splitSizes.get(sizeIdx);
sizeIdx += 1; // Create 1 split per offset
FileScanTask combinedTask = new SplitScanTask(offsets.get(offsetIdx), currentSize, parentScanTask);
return combinedTask;
}
}
@VisibleForTesting
static final class FixedSizeSplitScanTaskIterator implements Iterator {
private long offset;
private long remainingLen;
private long splitSize;
private final FileScanTask fileScanTask;
FixedSizeSplitScanTaskIterator(long splitSize, FileScanTask fileScanTask) {
this.offset = 0;
this.remainingLen = fileScanTask.length();
this.splitSize = splitSize;
this.fileScanTask = fileScanTask;
}
@Override
public boolean hasNext() {
return remainingLen > 0;
}
@Override
public FileScanTask next() {
long len = Math.min(splitSize, remainingLen);
final FileScanTask splitTask = new SplitScanTask(offset, len, fileScanTask);
offset += len;
remainingLen -= len;
return splitTask;
}
}
private static final class SplitScanTask implements FileScanTask {
private final long len;
private final long offset;
private final FileScanTask fileScanTask;
SplitScanTask(long offset, long len, FileScanTask fileScanTask) {
this.offset = offset;
this.len = len;
this.fileScanTask = fileScanTask;
}
@Override
public DataFile file() {
return fileScanTask.file();
}
@Override
public List deletes() {
return fileScanTask.deletes();
}
@Override
public PartitionSpec spec() {
return fileScanTask.spec();
}
@Override
public long start() {
return offset;
}
@Override
public long length() {
return len;
}
@Override
public Expression residual() {
return fileScanTask.residual();
}
@Override
public Iterable split(long splitSize) {
throw new UnsupportedOperationException("Cannot split a task which is already split");
}
public boolean isAdjacent(SplitScanTask other) {
return (other != null) &&
(this.file().equals(other.file())) &&
(this.offset + this.len == other.offset);
}
}
static List combineAdjacentTasks(List tasks) {
if (tasks.isEmpty()) {
return Collections.emptyList();
}
List combinedScans = Lists.newArrayList();
SplitScanTask lastSplit = null;
for (FileScanTask fileScanTask : tasks) {
if (!(fileScanTask instanceof SplitScanTask)) {
// Return any tasks not produced by split un-modified
combinedScans.add(fileScanTask);
} else {
SplitScanTask split = (SplitScanTask) fileScanTask;
if (lastSplit != null) {
if (lastSplit.isAdjacent(split)) {
// Merge with the last split
lastSplit = new SplitScanTask(
lastSplit.offset,
lastSplit.len + split.len,
lastSplit.fileScanTask);
} else {
// Last split is not adjacent, add it to finished adjacent groups
combinedScans.add(lastSplit);
lastSplit = split;
}
} else {
// First split
lastSplit = split;
}
}
}
if (lastSplit != null) {
combinedScans.add(lastSplit);
}
return combinedScans;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy