org.dinky.shaded.paimon.utils.FileUtils Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.utils;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.format.FormatReaderFactory;
import org.dinky.shaded.paimon.fs.FileIO;
import org.dinky.shaded.paimon.fs.FileStatus;
import org.dinky.shaded.paimon.fs.Path;
import org.dinky.shaded.paimon.reader.RecordReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ForkJoinWorkerThread;
import java.util.stream.Stream;
/** Utils for file reading and writing. */
public class FileUtils {
public static final ForkJoinPool COMMON_IO_FORK_JOIN_POOL;
// if we want to name threads in the fork join pool we need all these
// see https://stackoverflow.com/questions/34303094/
static {
ForkJoinPool.ForkJoinWorkerThreadFactory factory =
pool -> {
ForkJoinWorkerThread worker =
ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
worker.setName("file-store-common-io-" + worker.getPoolIndex());
return worker;
};
COMMON_IO_FORK_JOIN_POOL =
new ForkJoinPool(Runtime.getRuntime().availableProcessors(), factory, null, false);
}
public static List readListFromFile(
FileIO fileIO,
Path path,
ObjectSerializer serializer,
FormatReaderFactory readerFactory)
throws IOException {
List result = new ArrayList<>();
createFormatReader(fileIO, readerFactory, path)
.forEachRemaining(row -> result.add(serializer.fromRow(row)));
return result;
}
/**
* List versioned files for the directory.
*
* @return version stream
*/
public static Stream listVersionedFiles(FileIO fileIO, Path dir, String prefix)
throws IOException {
return listOriginalVersionedFiles(fileIO, dir, prefix).map(Long::parseLong);
}
/**
* List original versioned files for the directory.
*
* @return version stream
*/
public static Stream listOriginalVersionedFiles(FileIO fileIO, Path dir, String prefix)
throws IOException {
return listVersionedFileStatus(fileIO, dir, prefix)
.map(FileStatus::getPath)
.map(Path::getName)
.map(name -> name.substring(prefix.length()));
}
/**
* List versioned file status for the directory.
*
* @return file status stream
*/
public static Stream listVersionedFileStatus(FileIO fileIO, Path dir, String prefix)
throws IOException {
if (!fileIO.exists(dir)) {
return Stream.empty();
}
FileStatus[] statuses = fileIO.listStatus(dir);
if (statuses == null) {
throw new RuntimeException(
String.format(
"The return value is null of the listStatus for the '%s' directory.",
dir));
}
return Arrays.stream(statuses)
.filter(status -> status.getPath().getName().startsWith(prefix));
}
public static void checkExists(FileIO fileIO, Path file) throws IOException {
if (!fileIO.exists(file)) {
throw new FileNotFoundException(
String.format(
"File '%s' not found, Possible causes: "
+ "1.snapshot expires too fast, you can configure 'snapshot.time-retained'"
+ " option with a larger value. "
+ "2.consumption is too slow, you can improve the performance of consumption"
+ " (For example, increasing parallelism).",
file));
}
}
public static RecordReader createFormatReader(
FileIO fileIO, FormatReaderFactory format, Path file) throws IOException {
checkExists(fileIO, file);
return format.createReader(fileIO, file);
}
}