org.dinky.shaded.paimon.table.source.AppendOnlySplitGenerator Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.table.source;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.table.BucketMode;
import org.dinky.shaded.paimon.utils.BinPacking;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.function.Function;
import static org.dinky.shaded.paimon.append.AppendOnlyCompactManager.fileComparator;
/** Append only implementation of {@link SplitGenerator}. */
public class AppendOnlySplitGenerator implements SplitGenerator {
private final long targetSplitSize;
private final long openFileCost;
private final BucketMode bucketMode;
public AppendOnlySplitGenerator(
long targetSplitSize, long openFileCost, BucketMode bucketMode) {
this.targetSplitSize = targetSplitSize;
this.openFileCost = openFileCost;
this.bucketMode = bucketMode;
}
@Override
public List> splitForBatch(List input) {
List files = new ArrayList<>(input);
files.sort(fileComparator(bucketMode == BucketMode.UNAWARE));
Function weightFunc = file -> Math.max(file.fileSize(), openFileCost);
return BinPacking.packForOrdered(files, weightFunc, targetSplitSize);
}
@Override
public List> splitForStreaming(List files) {
// When the bucket mode is unaware, we spit the files as batch, because unaware-bucket table
// only contains one bucket (bucket 0).
if (bucketMode == BucketMode.UNAWARE) {
return splitForBatch(files);
} else {
return Collections.singletonList(files);
}
}
}